From 95d364ccd9f4650b46e5e308b38d449720a88a91 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 22 Jul 2020 16:24:42 +0200 Subject: Upgrade expat to 2.1.0 --- libxsde/xsde/c/expat/ascii.h | 7 + libxsde/xsde/c/expat/config.h | 4 +- libxsde/xsde/c/expat/expat.h | 38 +- libxsde/xsde/c/expat/expat_external.h | 6 +- libxsde/xsde/c/expat/internal.h | 2 +- libxsde/xsde/c/expat/xmlparse.c | 628 +++++++++++++++++++++++----------- libxsde/xsde/c/expat/xmlrole.c | 4 + libxsde/xsde/c/expat/xmltok.c | 127 +++++-- libxsde/xsde/c/expat/xmltok.h | 10 +- libxsde/xsde/c/expat/xmltok_impl.c | 70 ++-- libxsde/xsde/c/expat/xmltok_ns.c | 9 + 11 files changed, 631 insertions(+), 274 deletions(-) (limited to 'libxsde') diff --git a/libxsde/xsde/c/expat/ascii.h b/libxsde/xsde/c/expat/ascii.h index 337e5bb..d10530b 100644 --- a/libxsde/xsde/c/expat/ascii.h +++ b/libxsde/xsde/c/expat/ascii.h @@ -83,3 +83,10 @@ #define ASCII_LSQB 0x5B #define ASCII_RSQB 0x5D #define ASCII_UNDERSCORE 0x5F +#define ASCII_LPAREN 0x28 +#define ASCII_RPAREN 0x29 +#define ASCII_FF 0x0C +#define ASCII_SLASH 0x2F +#define ASCII_HASH 0x23 +#define ASCII_PIPE 0x7C +#define ASCII_COMMA 0x2C diff --git a/libxsde/xsde/c/expat/config.h b/libxsde/xsde/c/expat/config.h index 41c8fdf..788256c 100644 --- a/libxsde/xsde/c/expat/config.h +++ b/libxsde/xsde/c/expat/config.h @@ -3,7 +3,6 @@ #include - #if XSDE_BYTEORDER == 1234 # define BYTEORDER 1234 #elif XSDE_BYTEORDER == 4321 @@ -12,10 +11,10 @@ # error XSDE_BYTEORDER is not defined or defined to an invalid value #endif - #define XML_NS 1 #define XML_DTD 1 #define XML_CONTEXT_BYTES 1024 +#define XML_FREESTANDING 1 #define UNUSED(x) (void)x; @@ -29,6 +28,7 @@ #undef WIN32_LEAN_AND_MEAN #define HAVE_MEMMOVE 1 +#define COMPILED_FROM_DSP 1 #endif /* XSDE_PLATFORM_WIN32 || XSDE_PLATFORM_WINCE */ diff --git a/libxsde/xsde/c/expat/expat.h b/libxsde/xsde/c/expat/expat.h index 2dd29ff..f61d3fe 100644 --- a/libxsde/xsde/c/expat/expat.h +++ b/libxsde/xsde/c/expat/expat.h @@ -742,6 +742,29 @@ XML_GetSpecifiedAttributeCount(XML_Parser parser); XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); +#ifdef XML_ATTR_INFO +/* Source file byte offsets for the start and end of attribute names and values. + The value indices are exclusive of surrounding quotes; thus in a UTF-8 source + file an attribute value of "blah" will yield: + info->valueEnd - info->valueStart = 4 bytes. +*/ +typedef struct { + XML_Index nameStart; /* Offset to beginning of the attribute name. */ + XML_Index nameEnd; /* Offset after the attribute name's last byte. */ + XML_Index valueStart; /* Offset to beginning of the attribute value. */ + XML_Index valueEnd; /* Offset after the attribute value's last byte. */ +} XML_AttrInfo; + +/* Returns an array of XML_AttrInfo structures for the attribute/value pairs + passed in last call to the XML_StartElementHandler that were specified + in the start-tag rather than defaulted. Each attribute/value pair counts + as 1; thus the number of entries in the array is + XML_GetSpecifiedAttributeCount(parser) / 2. +*/ +XMLPARSEAPI(const XML_AttrInfo *) +XML_GetAttributeInfo(XML_Parser parser); +#endif + /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is detected. The last call to XML_Parse must have isFinal true; len may be zero for this call (or any other). @@ -883,6 +906,15 @@ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); +/* Sets the hash salt to use for internal hash calculations. + Helps in preventing DoS attacks based on predicting hash + function behavior. This must be called before parsing is started. + Returns 1 if successful, 0 when called after parsing has started. +*/ +XMLPARSEAPI(int) +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt); + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ @@ -983,7 +1015,9 @@ enum XML_FeatureEnum { XML_FEATURE_MIN_SIZE, XML_FEATURE_SIZEOF_XML_CHAR, XML_FEATURE_SIZEOF_XML_LCHAR, - XML_FEATURE_NS + XML_FEATURE_NS, + XML_FEATURE_LARGE_SIZE, + XML_FEATURE_ATTR_INFO /* Additional features must be added to the end of this enum. */ }; @@ -1003,7 +1037,7 @@ XML_GetFeatureList(void); change to major or minor version. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 0 +#define XML_MINOR_VERSION 1 #define XML_MICRO_VERSION 0 #ifdef __cplusplus diff --git a/libxsde/xsde/c/expat/expat_external.h b/libxsde/xsde/c/expat/expat_external.h index ea2279e..d54c2f2 100644 --- a/libxsde/xsde/c/expat/expat_external.h +++ b/libxsde/xsde/c/expat/expat_external.h @@ -38,9 +38,9 @@ system headers may assume the cdecl convention. */ #ifndef XMLCALL -#if defined(XML_USE_MSC_EXTENSIONS) +#if defined(_MSC_VER) #define XMLCALL __cdecl -#elif defined(__GNUC__) && defined(__i386) +#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) #define XMLCALL __attribute__((cdecl)) #else /* For any platform which uses this definition and supports more than @@ -101,7 +101,7 @@ typedef char XML_LChar; #ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ #if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400 -typedef __int64 XML_Index; +typedef __int64 XML_Index; typedef unsigned __int64 XML_Size; #else typedef long long XML_Index; diff --git a/libxsde/xsde/c/expat/internal.h b/libxsde/xsde/c/expat/internal.h index ff056c6..dd54548 100644 --- a/libxsde/xsde/c/expat/internal.h +++ b/libxsde/xsde/c/expat/internal.h @@ -20,7 +20,7 @@ and therefore subject to change. */ -#if defined(__GNUC__) && defined(__i386__) +#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__) /* We'll use this version by default only where we know it helps. regparm() generates warnings on Solaris boxes. See SF bug #692878. diff --git a/libxsde/xsde/c/expat/xmlparse.c b/libxsde/xsde/c/expat/xmlparse.c index 031e1a8..167ed0a 100644 --- a/libxsde/xsde/c/expat/xmlparse.c +++ b/libxsde/xsde/c/expat/xmlparse.c @@ -7,11 +7,23 @@ #include #include /* memset(), memcpy() */ #include +#include /* UINT_MAX */ + +#ifndef XML_FREESTANDING +#ifdef COMPILED_FROM_DSP +#define getpid GetCurrentProcessId +#else +#include /* gettimeofday() */ +#include /* getpid() */ +#include /* getpid() */ +#endif +#endif #define XML_BUILDING_EXPAT 1 #include +#include #include #ifdef XSDE_CUSTOM_ALLOCATOR @@ -24,7 +36,8 @@ #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS #define XmlEncode XmlUtf16Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) +/* Using pointer subtraction to convert to integer type. */ +#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1)) typedef unsigned short ICHAR; #else #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX @@ -238,7 +251,7 @@ typedef struct { typedef struct { unsigned long version; - unsigned long hash; + size_t hash; const XML_Char *uriName; } NS_ATT; @@ -323,7 +336,7 @@ initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, - XML_Bool haveMore); + XML_Bool haveMore, XML_Bool allowClosingDoctype); static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl); @@ -385,12 +398,13 @@ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); +dtdCopy(XML_Parser oldParser, + DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); static int -copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); - +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize); +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); static void FASTCALL hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); @@ -423,11 +437,15 @@ static ELEMENT_TYPE * getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); +static size_t generate_hash_secret_salt(XML_Parser parser); +static XML_Bool startParsing(XML_Parser parser); + static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, DTD *dtd); + static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -527,6 +545,9 @@ struct XML_ParserStruct { NS_ATT *m_nsAtts; unsigned long m_nsAttsVersion; unsigned char m_nsAttsPower; +#ifdef XML_ATTR_INFO + XML_AttrInfo *m_attInfo; +#endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; @@ -540,6 +561,7 @@ struct XML_ParserStruct { XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif + size_t m_hash_secret_salt; }; #define MALLOC(s) (parser->m_mem.malloc_fcn((s))) @@ -634,6 +656,7 @@ struct XML_ParserStruct { #define nsAtts (parser->m_nsAtts) #define nsAttsVersion (parser->m_nsAttsVersion) #define nsAttsPower (parser->m_nsAttsPower) +#define attInfo (parser->m_attInfo) #define tempPool (parser->m_tempPool) #define temp2Pool (parser->m_temp2Pool) #define groupConnector (parser->m_groupConnector) @@ -647,6 +670,7 @@ struct XML_ParserStruct { #define useForeignDTD (parser->m_useForeignDTD) #define paramEntityParsing (parser->m_paramEntityParsing) #endif /* XML_DTD */ +#define hash_secret_salt (parser->m_hash_secret_salt) XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) @@ -663,28 +687,76 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) } static const XML_Char implicitContext[] = { - 'x', 'm', 'l', '=', 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', - 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' + ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, + ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, + ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, + ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0' }; +#ifndef XML_FREESTANDING +static size_t +gather_time_entropy(void) +{ +#ifdef COMPILED_FROM_DSP + FILETIME ft; + GetSystemTimeAsFileTime(&ft); /* never fails */ + return ft.dwHighDateTime ^ ft.dwLowDateTime; +#else + struct timeval tv; + int gettimeofday_res; + + gettimeofday_res = gettimeofday(&tv, NULL); + assert (gettimeofday_res == 0); + + /* Microseconds time is <20 bits entropy */ + return tv.tv_usec; +#endif +} +#endif + +static size_t +generate_hash_secret_salt(XML_Parser parser) +{ +#ifdef XML_FREESTANDING + const size_t entropy = (size_t)&parser ^ (size_t)parser; +#else + /* Process ID is 0 bits entropy if attacker has local access + * XML_Parser address is few bits of entropy if attacker has local access */ + const size_t entropy = + gather_time_entropy() ^ getpid() ^ (size_t)parser; +#endif + + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ + if (sizeof(size_t) == 4) { + return entropy * (size_t)2147483647; + } else { + return entropy * (size_t)2305843009213693951; + } +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) +{ + /* hash functions must be initialized before setContext() is called */ + if (hash_secret_salt == 0) + hash_secret_salt = generate_hash_secret_salt(parser); + if (ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + return setContext(parser, implicitContext); + } + return XML_TRUE; +} + XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - XML_Parser parser = parserCreate(encodingName, memsuite, nameSep, NULL); - if (parser != NULL && ns) { - /* implicit context only set for root parser, since child - parsers (i.e. external entity parsers) will inherit it - */ - if (!setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return NULL; - } - } - return parser; + return parserCreate(encodingName, memsuite, nameSep, NULL); } static XML_Parser @@ -740,9 +812,20 @@ parserCreate(const XML_Char *encodingName, FREE(parser); return NULL; } +#ifdef XML_ATTR_INFO + attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo)); + if (attInfo == NULL) { + FREE(atts); + FREE(parser); + return NULL; + } +#endif dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); if (dataBuf == NULL) { FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif FREE(parser); return NULL; } @@ -755,6 +838,9 @@ parserCreate(const XML_Char *encodingName, if (_dtd == NULL) { FREE(dataBuf); FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif FREE(parser); return NULL; } @@ -770,7 +856,7 @@ parserCreate(const XML_Char *encodingName, unknownEncodingHandler = NULL; unknownEncodingHandlerData = NULL; - namespaceSeparator = '!'; + namespaceSeparator = ASCII_EXCL; ns = XML_FALSE; ns_triplets = XML_FALSE; @@ -869,6 +955,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) useForeignDTD = XML_FALSE; paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif + hash_secret_salt = 0; } /* moves list of bindings to freeBindingList */ @@ -916,7 +1003,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) poolClear(&temp2Pool); parserInit(parser, encodingName); dtdReset(_dtd, &parser->m_mem); - return setContext(parser, implicitContext); + return XML_TRUE; } enum XML_Status XMLCALL @@ -985,6 +1072,12 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, int oldInEntityValue = prologState.inEntityValue; #endif XML_Bool oldns_triplets = ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + size_t oldhash_secret_salt = hash_secret_salt; #ifdef XML_DTD if (!context) @@ -1038,13 +1131,14 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; ns_triplets = oldns_triplets; + hash_secret_salt = oldhash_secret_salt; parentParser = oldParser; #ifdef XML_DTD paramEntityParsing = oldParamEntityParsing; prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(_dtd, oldDtd, &parser->m_mem) + if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem) || !setContext(parser, context)) { XML_ParserFree(parser); return NULL; @@ -1133,6 +1227,9 @@ XML_ParserFree(XML_Parser parser) #endif /* XML_DTD */ dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); FREE((void *)atts); +#ifdef XML_ATTR_INFO + FREE((void *)attInfo); +#endif FREE(groupConnector); FREE(buffer); FREE(dataBuf); @@ -1213,6 +1310,14 @@ XML_GetIdAttributeIndex(XML_Parser parser) return idAttIndex; } +#ifdef XML_ATTR_INFO +const XML_AttrInfo * XMLCALL +XML_GetAttributeInfo(XML_Parser parser) +{ + return attInfo; +} +#endif + void XMLCALL XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, @@ -1429,6 +1534,17 @@ XML_SetParamEntityParsing(XML_Parser parser, #endif } +int XMLCALL +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt) /* should be size_t */ +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return 0; + hash_secret_salt = hash_salt; + return 1; +} + enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { @@ -1439,6 +1555,11 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) case XML_FINISHED: errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } default: ps_parsing = XML_PARSING; } @@ -1497,11 +1618,13 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) break; case XML_INITIALIZED: case XML_PARSING: - result = XML_STATUS_OK; if (isFinal) { ps_parsing = XML_FINISHED; - return result; + return XML_STATUS_OK; } + /* fall through */ + default: + result = XML_STATUS_OK; } } @@ -1509,23 +1632,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) nLeftOver = s + len - end; if (nLeftOver) { if (buffer == NULL || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - char *temp; - temp = (buffer == NULL - ? (char *)MALLOC(len * 2) - : (char *)REALLOC(buffer, len * 2)); - if (temp == NULL) { - errorCode = XML_ERROR_NO_MEMORY; - return XML_STATUS_ERROR; + /* avoid _signed_ integer overflow */ + char *temp = NULL; + const int bytesToAllocate = (int)((unsigned)len * 2U); + if (bytesToAllocate > 0) { + temp = (buffer == NULL + ? (char *)MALLOC(bytesToAllocate) + : (char *)REALLOC(buffer, bytesToAllocate)); } - buffer = temp; - if (!buffer) { + if (temp == NULL) { errorCode = XML_ERROR_NO_MEMORY; eventPtr = eventEndPtr = NULL; processor = errorProcessor; return XML_STATUS_ERROR; } - bufferLim = buffer + len * 2; + buffer = temp; + bufferLim = buffer + bytesToAllocate; } memcpy(buffer, end, nLeftOver); } @@ -1562,6 +1684,11 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) case XML_FINISHED: errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } default: ps_parsing = XML_PARSING; } @@ -1603,6 +1730,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) void * XMLCALL XML_GetBuffer(XML_Parser parser, int len) { +/* BEGIN MOZILLA CHANGE (sanity check len) */ + if (len < 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +/* END MOZILLA CHANGE */ switch (ps_parsing) { case XML_SUSPENDED: errorCode = XML_ERROR_SUSPENDED; @@ -1614,8 +1747,14 @@ XML_GetBuffer(XML_Parser parser, int len) } if (len > bufferLim - bufferEnd) { - /* FIXME avoid integer overflow */ - int neededSize = len + (int)(bufferEnd - bufferPtr); + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr)); +/* BEGIN MOZILLA CHANGE (sanity check neededSize) */ + if (neededSize < 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +/* END MOZILLA CHANGE */ #ifdef XML_CONTEXT_BYTES int keep = (int)(bufferPtr - buffer); @@ -1643,8 +1782,17 @@ XML_GetBuffer(XML_Parser parser, int len) if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; - } while (bufferSize < neededSize); + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int) (2U * (unsigned) bufferSize); +/* BEGIN MOZILLA CHANGE (prevent infinite loop on overflow) */ + } while (bufferSize < neededSize && bufferSize > 0); +/* END MOZILLA CHANGE */ +/* BEGIN MOZILLA CHANGE (sanity check bufferSize) */ + if (bufferSize <= 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +/* END MOZILLA CHANGE */ newBuf = (char *)MALLOC(bufferSize); if (newBuf == 0) { errorCode = XML_ERROR_NO_MEMORY; @@ -1675,6 +1823,8 @@ XML_GetBuffer(XML_Parser parser, int len) bufferPtr = buffer = newBuf; #endif /* not defined XML_CONTEXT_BYTES */ } + eventPtr = eventEndPtr = NULL; + positionPtr = NULL; } return bufferEnd; } @@ -1764,7 +1914,7 @@ XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); + return (XML_Index) (parseEndByteIndex - (parseEndPtr - eventPtr)); return -1; } @@ -1957,6 +2107,12 @@ XML_GetFeatureList(void) #ifdef XML_NS {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif +#ifdef XML_LARGE_SIZE + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, +#endif +#ifdef XML_ATTR_INFO + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif {XML_FEATURE_END, NULL, 0} }; @@ -2240,7 +2396,7 @@ doContent(XML_Parser parser, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&dtd->pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -2332,11 +2488,11 @@ doContent(XML_Parser parser, for (;;) { int bufSize; int convLen; - XmlConvert(enc, + const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); convLen = (int)(toPtr - (XML_Char *)tag->buf); - if (fromPtr == rawNameEnd) { + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } @@ -2551,26 +2707,29 @@ doContent(XML_Parser parser, *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, - (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) - break; - *eventPP = s; + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = s; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) + break; + *eventPP = s; + } } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) @@ -2627,12 +2786,12 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const XML_Char *localPart; /* lookup the element type name */ - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0); + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0); if (!elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); if (!name) return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name, + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; @@ -2646,23 +2805,44 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; ATTRIBUTE *temp; +#ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; +#endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; atts = temp; +#ifdef XML_ATTR_INFO + temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) + return XML_ERROR_NO_MEMORY; + attInfo = temp2; +#endif if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, atts); } appAtts = (const XML_Char **)atts; for (i = 0; i < n; i++) { + ATTRIBUTE *currAtt = &atts[i]; +#ifdef XML_ATTR_INFO + XML_AttrInfo *currAttInfo = &attInfo[i]; +#endif /* add the name and value to the attribute list */ - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); + ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name, + currAtt->name + + XmlNameLength(enc, currAtt->name)); if (!attId) return XML_ERROR_NO_MEMORY; +#ifdef XML_ATTR_INFO + currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name); + currAttInfo->nameEnd = currAttInfo->nameStart + + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parseEndByteIndex - + (parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd); +#endif /* Detect duplicate attributes by their QNames. This does not work when namespace processing is turned on and different prefixes for the same namespace are used. For this case we have a check further down. @@ -2770,7 +2950,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - int j; /* hash table index */ + size_t j; /* hash table index */ unsigned long version = nsAttsVersion; int nsAttsSize = (int)1 << nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ @@ -2801,21 +2981,21 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; - unsigned long uriHash = 0; + size_t uriHash = hash_secret_salt; ((XML_Char *)s)[-1] = 0; /* clear flag */ - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, s, 0); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); b = id->prefix->binding; if (!b) return XML_ERROR_UNBOUND_PREFIX; /* as we expand the name we also calculate its hash value */ - for (j = 0; j < b->uriLen; j++) { + for (j = 0; j < (size_t)b->uriLen; j++) { const XML_Char c = b->uri[j]; if (!poolAppendChar(&tempPool, c)) return XML_ERROR_NO_MEMORY; uriHash = CHAR_HASH(uriHash, c); } - while (*s++ != XML_T(':')) + while (*s++ != XML_T(ASCII_COLON)) ; do { /* copies null terminator */ const XML_Char c = *s; @@ -2825,10 +3005,10 @@ storeAtts(XML_Parser parser, const ENCODING *enc, } while (*s++); { /* Check hash table for duplicate of expanded name (uriName). - Derived from code in lookup(HASH_TABLE *table, ...). + Derived from code in lookup(parser, HASH_TABLE *table, ...). */ unsigned char step = 0; - unsigned long mask = nsAttsSize - 1; + size_t mask = nsAttsSize - 1; j = uriHash & mask; /* index into hash table */ while (nsAtts[j].version == version) { /* for speed we compare stored hash values first */ @@ -2889,7 +3069,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (!binding) return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; - while (*localPart++ != XML_T(':')) + while (*localPart++ != XML_T(ASCII_COLON)) ; } else if (dtd->defaultPrefix.binding) { @@ -2944,17 +3124,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { static const XML_Char xmlNamespace[] = { - 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', - 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, + ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0' }; static const int xmlLen = (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1; static const XML_Char xmlnsNamespace[] = { - 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - '2', '0', '0', '0', '/', 'x', 'm', 'l', 'n', 's', '/', '\0' + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, + ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, + ASCII_SLASH, '\0' }; static const int xmlnsLen = (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1; @@ -2971,13 +3155,13 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, return XML_ERROR_UNDECLARING_PREFIX; if (prefix->name - && prefix->name[0] == XML_T('x') - && prefix->name[1] == XML_T('m') - && prefix->name[2] == XML_T('l')) { + && prefix->name[0] == XML_T(ASCII_x) + && prefix->name[1] == XML_T(ASCII_m) + && prefix->name[2] == XML_T(ASCII_l)) { /* Not allowed to bind xmlns */ - if (prefix->name[3] == XML_T('n') - && prefix->name[4] == XML_T('s') + if (prefix->name[3] == XML_T(ASCII_n) + && prefix->name[4] == XML_T(ASCII_s) && prefix->name[5] == XML_T('\0')) return XML_ERROR_RESERVED_PREFIX_XMLNS; @@ -3131,26 +3315,29 @@ doCdataSection(XML_Parser parser, reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, - (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) - break; - *eventPP = s; + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = next; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) + break; + *eventPP = s; + } } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; @@ -3530,6 +3717,14 @@ entityValueInitProcessor(XML_Parser parser, *nextPtr = next; return XML_ERROR_NONE; } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "paramEntities, + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) @@ -3768,15 +3972,17 @@ doProlog(XML_Parser parser, #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; if (startDoctypeDeclHandler) { + XML_Char *pubId; if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; - doctypePubid = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!doctypePubid) + pubId = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!pubId) return XML_ERROR_NO_MEMORY; - normalizePublicId((XML_Char *)doctypePubid); + normalizePublicId(pubId); poolFinish(&tempPool); + doctypePubid = pubId; handleDefault = XML_FALSE; goto alreadyChecked; } @@ -3800,6 +4006,11 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_DOCTYPE_CLOSE: + if (allowClosingDoctype != XML_TRUE) { + /* Must not close doctype from within expanded parameter entities */ + return XML_ERROR_INVALID_TOKEN; + } + if (doctypeName) { startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, doctypePubid, 0); @@ -3815,7 +4026,8 @@ doProlog(XML_Parser parser, XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -3859,7 +4071,7 @@ doProlog(XML_Parser parser, XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -3959,11 +4171,11 @@ doProlog(XML_Parser parser, 0, parser)) return XML_ERROR_NO_MEMORY; if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) || !poolAppendChar(&tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; declAttributeType = tempPool.start; @@ -3996,11 +4208,11 @@ doProlog(XML_Parser parser, declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) || !poolAppendChar(&tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; declAttributeType = tempPool.start; @@ -4073,7 +4285,8 @@ doProlog(XML_Parser parser, break; #else /* XML_DTD */ if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) @@ -4148,7 +4361,7 @@ doProlog(XML_Parser parser, const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->generalEntities, name, + declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -4180,7 +4393,7 @@ doProlog(XML_Parser parser, const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -4327,14 +4540,14 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') + if (groupConnector[prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; - groupConnector[prologState.level] = ','; + groupConnector[prologState.level] = ASCII_COMMA; if (dtd->in_eldecl && elementDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') + if (groupConnector[prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; if (dtd->in_eldecl && !groupConnector[prologState.level] @@ -4346,7 +4559,7 @@ doProlog(XML_Parser parser, if (elementDeclHandler) handleDefault = XML_FALSE; } - groupConnector[prologState.level] = '|'; + groupConnector[prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD @@ -4362,7 +4575,7 @@ doProlog(XML_Parser parser, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); /* first, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -4699,7 +4912,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, if (entity->is_param) { int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); + next, &next, XML_FALSE, XML_FALSE); } else #endif /* XML_DTD */ @@ -4744,7 +4957,7 @@ internalEntityProcessor(XML_Parser parser, if (entity->is_param) { int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); result = doProlog(parser, internalEncoding, textStart, textEnd, tok, - next, &next, XML_FALSE); + next, &next, XML_FALSE, XML_TRUE); } else #endif /* XML_DTD */ @@ -4771,7 +4984,7 @@ internalEntityProcessor(XML_Parser parser, processor = prologProcessor; tok = XmlPrologTok(encoding, s, end, &next); return doProlog(parser, encoding, s, end, tok, next, nextPtr, - (XML_Bool)!ps_finalBuffer); + (XML_Bool)!ps_finalBuffer, XML_TRUE); } else #endif /* XML_DTD */ @@ -4890,7 +5103,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&temp2Pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal. @@ -4937,7 +5150,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, if (!entity->textPtr) { if (enc == encoding) eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; @@ -4999,7 +5212,7 @@ storeEntityValue(XML_Parser parser, result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&tempPool); if (!entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ @@ -5207,6 +5420,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -5219,11 +5433,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc, } do { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; - } while (s != end); + } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); @@ -5280,7 +5494,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { - if (*name == XML_T(':')) { + if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { @@ -5289,7 +5503,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (!prefix) return 0; @@ -5298,6 +5512,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) else poolDiscard(&dtd->pool); elementType->prefix = prefix; + break; } } @@ -5318,7 +5533,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, return NULL; /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) return NULL; if (id->name != name) @@ -5327,23 +5542,23 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, poolFinish(&dtd->pool); if (!ns) ; - else if (name[0] == XML_T('x') - && name[1] == XML_T('m') - && name[2] == XML_T('l') - && name[3] == XML_T('n') - && name[4] == XML_T('s') - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { + else if (name[0] == XML_T(ASCII_x) + && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) + && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd->prefixes, name + 6, sizeof(PREFIX)); + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); id->xmlns = XML_TRUE; } else { int i; for (i = 0; name[i]; i++) { /* attributes without prefix are *not* in the default namespace */ - if (name[i] == XML_T(':')) { + if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { if (!poolAppendChar(&dtd->pool, name[j])) @@ -5351,7 +5566,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; - id->prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); @@ -5365,7 +5580,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, return id; } -#define CONTEXT_SEP XML_T('\f') +#define CONTEXT_SEP XML_T(ASCII_FF) static const XML_Char * getContext(XML_Parser parser) @@ -5377,7 +5592,7 @@ getContext(XML_Parser parser) if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) return NULL; len = dtd->defaultPrefix.binding->uriLen; if (namespaceSeparator) @@ -5403,7 +5618,7 @@ getContext(XML_Parser parser) for (s = prefix->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return NULL; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) return NULL; len = prefix->binding->uriLen; if (namespaceSeparator) @@ -5447,7 +5662,7 @@ setContext(XML_Parser parser, const XML_Char *context) ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - e = (ENTITY *)lookup(&dtd->generalEntities, poolStart(&tempPool), 0); + e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) @@ -5455,14 +5670,14 @@ setContext(XML_Parser parser, const XML_Char *context) context = s; poolDiscard(&tempPool); } - else if (*s == XML_T('=')) { + else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; if (poolLength(&tempPool) == 0) prefix = &dtd->defaultPrefix; else { if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&tempPool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool), sizeof(PREFIX)); if (!prefix) return XML_FALSE; @@ -5626,7 +5841,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) The new DTD has already been initialized. */ static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; @@ -5641,7 +5856,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) name = poolCopyString(&(newDtd->pool), oldP->name); if (!name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -5663,7 +5878,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) if (!name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); if (!newA) return 0; @@ -5673,7 +5888,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); } } @@ -5692,7 +5907,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); if (!newE) return 0; @@ -5706,14 +5921,14 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { newE->defaultAtts[i].id = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value @@ -5727,13 +5942,15 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } /* Copy the entity tables. */ - if (!copyEntityTable(&(newDtd->generalEntities), + if (!copyEntityTable(oldParser, + &(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD - if (!copyEntityTable(&(newDtd->paramEntities), + if (!copyEntityTable(oldParser, + &(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; @@ -5756,7 +5973,8 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } /* End dtdCopy */ static int -copyEntityTable(HASH_TABLE *newTable, +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable) { @@ -5775,7 +5993,7 @@ copyEntityTable(HASH_TABLE *newTable, name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { @@ -5832,17 +6050,17 @@ keyeq(KEY s1, KEY s2) return XML_FALSE; } -static unsigned long FASTCALL -hash(KEY s) +static size_t FASTCALL +hash(XML_Parser parser, KEY s) { - unsigned long h = 0; + size_t h = hash_secret_salt; while (*s) h = CHAR_HASH(h, *s++); return h; } static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize) +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { @@ -5859,11 +6077,11 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) return NULL; } memset(table->v, 0, tsize); - i = hash(name) & ((unsigned long)table->size - 1); + i = hash(parser, name) & ((unsigned long)table->size - 1); } else { - unsigned long h = hash(name); - unsigned long mask = (unsigned long)table->size - 1; + size_t h = hash(parser, name); + size_t mask = table->size - 1; unsigned char step = 0; i = h & mask; while (table->v[i]) { @@ -5880,7 +6098,7 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) if (table->used >> (table->power - 1)) { unsigned char newPower = table->power + 1; size_t newSize = (size_t)1 << newPower; - unsigned long newMask = (unsigned long)newSize - 1; + size_t newMask = newSize - 1; size_t tsize = newSize * sizeof(NAMED *); NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); if (!newV) @@ -5888,7 +6106,7 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { - unsigned long newHash = hash(table->v[i]->name); + size_t newHash = hash(parser, table->v[i]->name); size_t j = newHash & newMask; step = 0; while (newV[j]) { @@ -6023,8 +6241,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, if (!pool->ptr && !poolGrow(pool)) return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; if (!poolGrow(pool)) return NULL; @@ -6108,13 +6326,19 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (int)(pool->end - pool->start)*2; - pool->blocks = (BLOCK *) + BLOCK *temp; + int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); + + if (blockSize < 0) + return XML_FALSE; + + temp = (BLOCK *) pool->mem->realloc_fcn(pool->blocks, (offsetof(BLOCK, s) + blockSize * sizeof(XML_Char))); - if (pool->blocks == NULL) + if (temp == NULL) return XML_FALSE; + pool->blocks = temp; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; @@ -6123,6 +6347,10 @@ poolGrow(STRING_POOL *pool) else { BLOCK *tem; int blockSize = (int)(pool->end - pool->start); + + if (blockSize < 0) + return XML_FALSE; + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else @@ -6263,7 +6491,7 @@ getElementType(XML_Parser parser, if (!name) return NULL; - ret = (ELEMENT_TYPE *) lookup(&dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); + ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!ret) return NULL; if (ret->name != name) diff --git a/libxsde/xsde/c/expat/xmlrole.c b/libxsde/xsde/c/expat/xmlrole.c index 359a0d1..77f4b63 100644 --- a/libxsde/xsde/c/expat/xmlrole.c +++ b/libxsde/xsde/c/expat/xmlrole.c @@ -45,12 +45,16 @@ static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +#ifdef XML_DTD static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +#endif static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +#ifdef XML_DTD static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +#endif static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_NMTOKEN[] = { diff --git a/libxsde/xsde/c/expat/xmltok.c b/libxsde/xsde/c/expat/xmltok.c index dad812d..a4fd969 100644 --- a/libxsde/xsde/c/expat/xmltok.c +++ b/libxsde/xsde/c/expat/xmltok.c @@ -317,7 +317,9 @@ sb_charMatches(const ENCODING *enc, const char *p, int c) #endif #define PREFIX(ident) normal_ ## ident +#define XML_TOK_IMPL_C #include +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -336,11 +338,12 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; char *to; const char *from; @@ -348,30 +351,45 @@ utf8_toUtf8(const ENCODING *enc, if (fromLim - *fromP > toLim - *toP) { /* Avoid copying partial characters. */ + res = XML_CONVERT_OUTPUT_EXHAUSTED; for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) + for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) *to = *from; *fromP = from; *toP = to; + + if ((to == toLim) && (from < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return res; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + break; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + break; + } *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; @@ -379,8 +397,14 @@ utf8_toUtf16(const ENCODING *enc, case BT_LEAD4: { unsigned long n; - if (to + 1 == toLim) + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; goto after; + } + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; @@ -398,6 +422,7 @@ utf8_toUtf16(const ENCODING *enc, after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS @@ -446,7 +471,7 @@ static const struct normal_encoding internal_utf8_encoding = { STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) @@ -456,32 +481,37 @@ latin1_toUtf8(const ENCODING *enc, for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static void PTRCALL +static enum XML_Convert_Result PTRCALL latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { UNUSED(enc); - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -508,15 +538,20 @@ static const struct normal_encoding latin1_encoding = { STANDARD_VTABLE(sb_) ZERO_VTABLE }; -static void PTRCALL +static enum XML_Convert_Result PTRCALL ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { UNUSED(enc); - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -563,14 +598,15 @@ unicode_byte_type(char hi, char lo) } #define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ +static enum XML_Convert_Result PTRCALL \ E ## toUtf8(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ { \ - const char *from; \ + const char *from = *fromP; \ UNUSED(enc); \ - for (from = *fromP; from != fromLim; from += 2) { \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ int plane; \ unsigned char lo2; \ unsigned char lo = GET_LO(from); \ @@ -580,7 +616,7 @@ E ## toUtf8(const ENCODING *enc, \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = lo; \ break; \ @@ -590,7 +626,7 @@ E ## toUtf8(const ENCODING *enc, \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ @@ -598,7 +634,7 @@ E ## toUtf8(const ENCODING *enc, \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ @@ -608,7 +644,11 @@ E ## toUtf8(const ENCODING *enc, \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ @@ -624,21 +664,33 @@ E ## toUtf8(const ENCODING *enc, \ } \ } \ *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ } #define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ +static enum XML_Convert_Result PTRCALL \ E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ - UNUSED(enc); \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + UNUSED(enc); \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ } #define SET2(ptr, ch) \ @@ -725,7 +777,9 @@ little2_isNmstrtMin(const ENCODING *enc, const char *p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) +#define XML_TOK_IMPL_C #include +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -864,7 +918,9 @@ big2_isNmstrtMin(const ENCODING *enc, const char *p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) +#define XML_TOK_IMPL_C #include +#undef XML_TOK_IMPL_C #undef MINBPC #undef BYTE_TYPE @@ -1314,7 +1370,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p) return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) @@ -1325,21 +1381,21 @@ unknown_toUtf8(const ENCODING *enc, const char *utf8; int n; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } do { @@ -1348,13 +1404,13 @@ unknown_toUtf8(const ENCODING *enc, } } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { c = (unsigned short) @@ -1366,6 +1422,11 @@ unknown_toUtf16(const ENCODING *enc, (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * @@ -1529,7 +1590,7 @@ initScan(const ENCODING * const *encodingTable, { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { @@ -1644,7 +1705,9 @@ initScan(const ENCODING * const *encodingTable, #define NS(x) x #define ns(x) x +#define XML_TOK_NS_C #include +#undef XML_TOK_NS_C #undef NS #undef ns @@ -1653,7 +1716,9 @@ initScan(const ENCODING * const *encodingTable, #define NS(x) x ## NS #define ns(x) x ## _ns +#define XML_TOK_NS_C #include +#undef XML_TOK_NS_C #undef NS #undef ns diff --git a/libxsde/xsde/c/expat/xmltok.h b/libxsde/xsde/c/expat/xmltok.h index ca867aa..752007e 100644 --- a/libxsde/xsde/c/expat/xmltok.h +++ b/libxsde/xsde/c/expat/xmltok.h @@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *, const char *, const char **); +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */ +}; + struct encoding { SCANNER scanners[XML_N_STATES]; SCANNER literalScanners[XML_N_LITERAL_TYPES]; @@ -158,12 +164,12 @@ struct encoding { const char *ptr, const char *end, const char **badPtr); - void (PTRCALL *utf8Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim); - void (PTRCALL *utf16Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, diff --git a/libxsde/xsde/c/expat/xmltok_impl.c b/libxsde/xsde/c/expat/xmltok_impl.c index b0d23c8..5410278 100644 --- a/libxsde/xsde/c/expat/xmltok_impl.c +++ b/libxsde/xsde/c/expat/xmltok_impl.c @@ -2,6 +2,9 @@ See the file COPYING for copying permission. */ +/* This file is included! */ +#ifdef XML_TOK_IMPL_C + #ifndef IS_INVALID_CHAR #define IS_INVALID_CHAR(enc, ptr, n) (0) #endif @@ -90,13 +93,13 @@ static int PTRCALL PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: @@ -144,7 +147,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: if (ptr + MINBPC(enc) == end) @@ -233,7 +236,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -242,7 +245,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: @@ -308,7 +311,7 @@ static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -351,7 +354,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -394,11 +397,11 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; @@ -435,7 +438,7 @@ static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -444,7 +447,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -467,7 +470,7 @@ static int PTRCALL PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (ptr < end) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { @@ -477,7 +480,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; @@ -509,7 +512,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -532,7 +535,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon = 0; #endif - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -719,7 +722,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, hadColon = 0; #endif /* we have a start-tag */ - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -743,7 +746,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: @@ -788,7 +791,7 @@ static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -835,7 +838,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -888,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) - return -XML_TOK_PERCENT; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: @@ -898,7 +901,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -924,7 +927,7 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: @@ -944,7 +947,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - while (ptr != end) { + while (ptr < end) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) @@ -976,7 +979,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1144,7 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: @@ -1207,10 +1210,10 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; start = ptr; - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1265,10 +1268,10 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; start = ptr; - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1329,7 +1332,7 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: @@ -1376,7 +1379,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; ptr < end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1754,7 +1757,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr != end) { + while (ptr < end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -1770,7 +1773,7 @@ PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (ptr < end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; @@ -1790,3 +1793,4 @@ PREFIX(updatePosition)(const ENCODING *enc, #undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASES +#endif /* XML_TOK_IMPL_C */ diff --git a/libxsde/xsde/c/expat/xmltok_ns.c b/libxsde/xsde/c/expat/xmltok_ns.c index d2f8938..c3b88fd 100644 --- a/libxsde/xsde/c/expat/xmltok_ns.c +++ b/libxsde/xsde/c/expat/xmltok_ns.c @@ -1,3 +1,10 @@ +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. +*/ + +/* This file is included! */ +#ifdef XML_TOK_NS_C + const ENCODING * NS(XmlGetUtf8InternalEncoding)(void) { @@ -104,3 +111,5 @@ NS(XmlParseXmlDecl)(int isGeneralTextEntity, encoding, standalone); } + +#endif /* XML_TOK_NS_C */ -- cgit v1.1