summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2017-05-12 14:57:13 +0200
committerAlexandru Croitor <alexandru.croitor@qt.io>2017-07-11 13:20:21 +0000
commitf7424695068da4ad052e5dc5587e76a0a356957c (patch)
tree66cf445733bda8ae7c537ee5192da61a18c90a21
parent77aea3903db87a72b6bea38e929bd43dc1528cef (diff)
[Backport] Update expat to 2.2.0 to fix CVE vulnerability.
Security fixes: CVE-2016-0718 -- Fix crash on malformed input CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 / CVE-2015-2716 introduced with Expat 2.1.1 CVE-2016-5300 -- Use more entropy for hash initialization than the original fix to CVE-2012-0876 CVE-2012-6702 -- Resolve troublesome internal call to srand that was introduced with Expat 2.1.0 when addressing CVE-2012-0876 (issue #496) BUG=703537 Review-Url: https://codereview.chromium.org/2761253002 Cr-Commit-Position: refs/heads/master@{#459025} (cherry picked from commit 33a5703a620ec246ee08214e6c880068b6e9d687) Change-Id: I13fe28e71dc239914ddd425cc6e8c17771d07a39 Review-Url: https://codereview.chromium.org/2781503002 . Cr-Commit-Position: refs/branch-heads/3029@{#425} Cr-Branched-From: 939b32ee5ba05c396eef3fd992822fcca9a2e262-refs/heads/master@{#454471} Reviewed-by: Michael Brüning <michael.bruning@qt.io> Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r--chromium/third_party/expat/README.chromium38
-rw-r--r--chromium/third_party/expat/files/COPYING5
-rw-r--r--chromium/third_party/expat/files/Changes66
-rw-r--r--chromium/third_party/expat/files/MANIFEST4
-rw-r--r--chromium/third_party/expat/files/README4
-rw-r--r--chromium/third_party/expat/files/lib/amigaconfig.h3
-rw-r--r--chromium/third_party/expat/files/lib/expat.h13
-rw-r--r--chromium/third_party/expat/files/lib/expat_config.h15
-rw-r--r--chromium/third_party/expat/files/lib/expat_external.h15
-rw-r--r--chromium/third_party/expat/files/lib/internal.h22
-rw-r--r--chromium/third_party/expat/files/lib/libexpat.def3
-rw-r--r--chromium/third_party/expat/files/lib/libexpatw.def3
-rw-r--r--chromium/third_party/expat/files/lib/xmlparse.c133
-rw-r--r--chromium/third_party/expat/files/lib/xmlparse.c.original127
-rw-r--r--chromium/third_party/expat/files/lib/xmlrole.c224
-rw-r--r--chromium/third_party/expat/files/lib/xmltok.c232
-rw-r--r--chromium/third_party/expat/files/lib/xmltok.h10
-rw-r--r--chromium/third_party/expat/files/lib/xmltok_impl.c226
-rw-r--r--chromium/third_party/expat/files/lib/xmltok_impl.c.original1783
19 files changed, 743 insertions, 2183 deletions
diff --git a/chromium/third_party/expat/README.chromium b/chromium/third_party/expat/README.chromium
index a0af1e2d310..e3dda3aea97 100644
--- a/chromium/third_party/expat/README.chromium
+++ b/chromium/third_party/expat/README.chromium
@@ -1,7 +1,7 @@
Name: Expat XML Parser
Short Name: expat
URL: http://sourceforge.net/projects/expat/
-Version: 2.1.0
+Version: 2.2.0
License: MIT
License File: files/COPYING
Security Critical: yes
@@ -21,25 +21,43 @@ Local Modifications:
conftools/*
doc/*
examples/*
+ m4/*
tests/*
vms/*
win32/*
xmlwf/*
+ aclocal.m4
+ CMake.README
+ CMakeLists.txt
configure
- configure.in
+ configure.ac
+ configureChecks.cmake
Makefile.in
expat.dsw
- expat.dsw
+ expat.pc.in
+ expat_config.h.cmake
expat_config.h.in
+ Makefile.in
Edited:
lib/winconfig.h (see winconfig.h.original for unmodified version)
- * Added check on line 1751 of xmltok_impl.c to patch a
- bug with the handling of utf-8 characters that leads to a crash.
- lib/xmltok_impl.c (see xmltok_imp.c.original for unmodified version)
- * Prevent a compiler warning when compiling with
- WIN32_LEAN_AND_MEAN predefined.
lib/xmlparse.c (see xmlparse.c.original for unmodified version)
- * Apply https://hg.mozilla.org/releases/mozilla-esr31/rev/2f3e78643f5c
- to prevent an integer overflow.
+ * Added line 713 of xmlparse.c to suppress compiling error.
+ * Apply expat patch, Fix double free error.
+ https://github.com/libexpat/libexpat/commit/7ae9c3d3af433cd4defe95234eae7dc8ed15637f
+ * Apply expat patch. expat 2.2.0 fixed CVE-2016-0718 but cause
+ other regression. expat's issue number is #539.
+ https://github.com/libexpat/libexpat/commit/af507cef2c93cb8d40062a0abe43a4f4e9158fb2
+ https://sourceforge.net/p/expat/bugs/539/
+ lib/xmltok.c (see xmltok.c.original for unmodified version)
+ Also expat issue #539.
+ https://github.com/libexpat/libexpat/commit/896b6c1fd3b842f377d1b62135dccf0a579cf65d
+ lib/expat_external.h(see expat_external.h for unmodified version)
+ * Disallow adding symbol visibility attribute automatically to
+ function for static linked library.
Added files:
lib/expat_config.h (a generated config file)
+
+ Old CVE-2015-1283 fix for expat 2.1.0 from mozilla hg
+ https://hg.mozilla.org/releases/mozilla-esr31/rev/2f3e78643f5c
+ is included in expat 2.2.0.
+ https://github.com/libexpat/libexpat/commit/ba0f9c3b40c264b8dd392e02a7a060a8fa54f032
diff --git a/chromium/third_party/expat/files/COPYING b/chromium/third_party/expat/files/COPYING
index dcb45064296..092c83baee1 100644
--- a/chromium/third_party/expat/files/COPYING
+++ b/chromium/third_party/expat/files/COPYING
@@ -1,6 +1,5 @@
-Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
- and Clark Cooper
-Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
+Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
+Copyright (c) 2001-2016 Expat maintainers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/chromium/third_party/expat/files/Changes b/chromium/third_party/expat/files/Changes
index 08897b9f9ed..583c8685762 100644
--- a/chromium/third_party/expat/files/Changes
+++ b/chromium/third_party/expat/files/Changes
@@ -1,3 +1,67 @@
+Release 2.2.0 Tue June 21 2016
+ Security fixes:
+ #537 CVE-2016-0718 -- Fix crash on malformed input
+ CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 /
+ CVE-2015-2716 introduced with Expat 2.1.1
+ #499 CVE-2016-5300 -- Use more entropy for hash initialization
+ than the original fix to CVE-2012-0876
+ #519 CVE-2012-6702 -- Resolve troublesome internal call to srand
+ that was introduced with Expat 2.1.0
+ when addressing CVE-2012-0876 (issue #496)
+
+ Bug fixes:
+ Fix uninitialized reads of size 1
+ (e.g. in little2_updatePosition)
+ Fix detection of UTF-8 character boundaries
+
+ Other changes:
+ #532 Fix compilation for Visual Studio 2010 (keyword "C99")
+ Autotools: Resolve use of "$<" to better support bmake
+ Autotools: Add QA script "qa.sh" (and make target "qa")
+ Autotools: Respect CXXFLAGS if given
+ Autotools: Fix "make run-xmltest"
+ Autotools: Have "make run-xmltest" check for expected output
+ p90 CMake: Fix static build (BUILD_shared=OFF) on Windows
+ #536 CMake: Add soversion, support -DNO_SONAME=yes to bypass
+ #323 CMake: Add suffix "d" to differentiate debug from release
+ CMake: Define WIN32 with CMake on Windows
+ Annotate memory allocators for GCC
+ Address all currently known compile warnings
+ Make sure that API symbols remain visible despite
+ -fvisibility=hidden
+ Remove executable flag from source files
+ Resolve COMPILED_FROM_DSP in favor of WIN32
+
+ Special thanks to:
+ Björn Lindahl
+ Christian Heimes
+ Cristian Rodríguez
+ Daniel Krügler
+ Gustavo Grieco
+ Karl Waclawek
+ László Böszörményi
+ Marco Grassi
+ Pascal Cuoq
+ Sergei Nikulov
+ Thomas Beutlich
+ Warren Young
+ Yann Droneaud
+
+Release 2.1.1 Sat March 12 2016
+ Security fixes:
+ #582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer
+
+ Bug fixes:
+ #502: Fix potential null pointer dereference
+ #520: Symbol XML_SetHashSalt was not exported
+ Output of "xmlwf -h" was incomplete
+
+ Other changes:
+ #503: Document behavior of calling XML_SetHashSalt with salt 0
+ Minor improvements to man page xmlwf(1)
+ Improvements to the experimental CMake build system
+ libtool now invoked with --verbose
+
Release 2.1.0 Sat March 24 2012
- Bug Fixes:
#1742315: Harmful XML_ParserCreateNS suggestion.
@@ -23,7 +87,7 @@ Release 2.1.0 Sat March 24 2012
#3312568: CMake support.
#3446384: Report byte offsets for attr names and values.
- New Features / API changes:
- Added new API member XML_SetHashSalt() that allows setting an intial
+ Added new API member XML_SetHashSalt() that allows setting an initial
value (salt) for hash calculations. This is part of the fix for
bug #3496608 to randomize hash parameters.
When compiled with XML_ATTR_INFO defined, adds new API member
diff --git a/chromium/third_party/expat/files/MANIFEST b/chromium/third_party/expat/files/MANIFEST
index 7a020dc05b0..afb9acacdf0 100644
--- a/chromium/third_party/expat/files/MANIFEST
+++ b/chromium/third_party/expat/files/MANIFEST
@@ -44,7 +44,7 @@ doc/reference.html
doc/style.css
doc/valid-xhtml10.png
doc/xmlwf.1
-doc/xmlwf.sgml
+doc/xmlwf.xml
CMakeLists.txt
CMake.README
COPYING
@@ -54,7 +54,7 @@ MANIFEST
Makefile.in
README
configure
-configure.in
+configure.ac
expat_config.h.in
expat_config.h.cmake
expat.pc.in
diff --git a/chromium/third_party/expat/files/README b/chromium/third_party/expat/files/README
index 1f88467d1b0..a7d28450d54 100644
--- a/chromium/third_party/expat/files/README
+++ b/chromium/third_party/expat/files/README
@@ -1,5 +1,5 @@
- Expat, Release 2.1.0
+ Expat, Release 2.2.0
This is Expat, a C library for parsing XML, written by James Clark.
Expat is a stream-oriented XML parser. This means that you register
@@ -114,7 +114,7 @@ Note for Solaris users: The "ar" command is usually located in
"/usr/ccs/bin", which is not in the default PATH. You will need to
add this to your path for the "make" command, and probably also switch
to GNU make (the "make" found in /usr/ccs/bin does not seem to work
-properly -- appearantly it does not understand .PHONY directives). If
+properly -- apparently it does not understand .PHONY directives). If
you're using ksh or bash, use this command to build:
PATH=/usr/ccs/bin:$PATH make
diff --git a/chromium/third_party/expat/files/lib/amigaconfig.h b/chromium/third_party/expat/files/lib/amigaconfig.h
index 86c61150402..49c92c70149 100644
--- a/chromium/third_party/expat/files/lib/amigaconfig.h
+++ b/chromium/third_party/expat/files/lib/amigaconfig.h
@@ -7,9 +7,6 @@
/* Define to 1 if you have the `bcopy' function. */
#define HAVE_BCOPY 1
-/* Define to 1 if you have the <check.h> header file. */
-#undef HAVE_CHECK_H
-
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE 1
diff --git a/chromium/third_party/expat/files/lib/expat.h b/chromium/third_party/expat/files/lib/expat.h
index 9a21680be46..086e24b39c5 100644
--- a/chromium/third_party/expat/files/lib/expat.h
+++ b/chromium/third_party/expat/files/lib/expat.h
@@ -342,7 +342,7 @@ XML_SetEntityDeclHandler(XML_Parser parser,
XML_EntityDeclHandler handler);
/* OBSOLETE -- OBSOLETE -- OBSOLETE
- This handler has been superceded by the EntityDeclHandler above.
+ This handler has been superseded by the EntityDeclHandler above.
It is provided here for backward compatibility.
This is called for a declaration of an unparsed (NDATA) entity.
@@ -973,9 +973,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model);
/* Exposing the memory handling functions used in Expat */
XMLPARSEAPI(void *)
+XML_ATTR_MALLOC
+XML_ATTR_ALLOC_SIZE(2)
XML_MemMalloc(XML_Parser parser, size_t size);
XMLPARSEAPI(void *)
+XML_ATTR_ALLOC_SIZE(3)
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
XMLPARSEAPI(void)
@@ -1031,13 +1034,11 @@ XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
-/* Expat follows the GNU/Linux convention of odd number minor version for
- beta/development releases and even number minor version for stable
- releases. Micro is bumped with each release, and set to 0 with each
- change to major or minor version.
+/* Expat follows the semantic versioning convention.
+ See http://semver.org.
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 1
+#define XML_MINOR_VERSION 2
#define XML_MICRO_VERSION 0
#ifdef __cplusplus
diff --git a/chromium/third_party/expat/files/lib/expat_config.h b/chromium/third_party/expat/files/lib/expat_config.h
index d5971ef7241..1eaa22aee50 100644
--- a/chromium/third_party/expat/files/lib/expat_config.h
+++ b/chromium/third_party/expat/files/lib/expat_config.h
@@ -1,10 +1,10 @@
// No copyright notice; this file based on autogenerated header
-#ifndef THIRD_PARTY_EXPAT_V2_0_1_SOURCE_LIB_EXPAT_CONFIG_H__
-#define THIRD_PARTY_EXPAT_V2_0_1_SOURCE_LIB_EXPAT_CONFIG_H__
+#ifndef THIRD_PARTY_EXPAT_V2_2_0_SOURCE_LIB_EXPAT_CONFIG_H__
+#define THIRD_PARTY_EXPAT_V2_2_0_SOURCE_LIB_EXPAT_CONFIG_H__
/* expat_config.h. Generated from expat_config.h.in by configure. */
-/* expat_config.h.in. Generated from configure.in by autoheader. */
+/* expat_config.h.in. Generated from configure.ac by autoheader. */
/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
#define BYTEORDER 1234
@@ -57,8 +57,7 @@
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
- */
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"
/* Define to the address where bug reports for this package should be sent. */
@@ -68,7 +67,7 @@
#define PACKAGE_NAME "expat"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "expat 2.1.0"
+#define PACKAGE_STRING "expat 2.2.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "expat"
@@ -77,7 +76,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.1.0"
+#define PACKAGE_VERSION "2.2.0"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
@@ -107,4 +106,4 @@
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
-#endif // THIRD_PARTY_EXPAT_V2_0_1_SOURCE_LIB_EXPAT_CONFIG_H__
+#endif // THIRD_PARTY_EXPAT_V2_2_0_SOURCE_LIB_EXPAT_CONFIG_H__
diff --git a/chromium/third_party/expat/files/lib/expat_external.h b/chromium/third_party/expat/files/lib/expat_external.h
index 2c03284ea26..124c7777f14 100644
--- a/chromium/third_party/expat/files/lib/expat_external.h
+++ b/chromium/third_party/expat/files/lib/expat_external.h
@@ -65,12 +65,27 @@
#endif
#endif /* not defined XML_STATIC */
+/* Don't change symbol visibility if used as a static link libraray */
+#if !defined(XML_STATIC) && !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
+#define XMLIMPORT __attribute__ ((visibility ("default")))
+#endif
/* If we didn't define it above, define it away: */
#ifndef XMLIMPORT
#define XMLIMPORT
#endif
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define XML_ATTR_MALLOC __attribute__((__malloc__))
+#else
+#define XML_ATTR_MALLOC
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))
+#else
+#define XML_ATTR_ALLOC_SIZE(x)
+#endif
#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
diff --git a/chromium/third_party/expat/files/lib/internal.h b/chromium/third_party/expat/files/lib/internal.h
index dd5454831da..94cb98e15ca 100644
--- a/chromium/third_party/expat/files/lib/internal.h
+++ b/chromium/third_party/expat/files/lib/internal.h
@@ -71,3 +71,25 @@
#define inline
#endif
#endif
+
+#ifndef UNUSED_P
+# ifdef __GNUC__
+# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
+# else
+# define UNUSED_P(p) UNUSED_ ## p
+# endif
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/chromium/third_party/expat/files/lib/libexpat.def b/chromium/third_party/expat/files/lib/libexpat.def
index 3920bbcf901..e4f1afc8333 100644
--- a/chromium/third_party/expat/files/lib/libexpat.def
+++ b/chromium/third_party/expat/files/lib/libexpat.def
@@ -71,3 +71,6 @@ EXPORTS
XML_StopParser @63
XML_ResumeParser @64
XML_GetParsingStatus @65
+; added with version 2.1.1
+; XML_GetAttributeInfo @66
+ XML_SetHashSalt @66
diff --git a/chromium/third_party/expat/files/lib/libexpatw.def b/chromium/third_party/expat/files/lib/libexpatw.def
index 3920bbcf901..e4f1afc8333 100644
--- a/chromium/third_party/expat/files/lib/libexpatw.def
+++ b/chromium/third_party/expat/files/lib/libexpatw.def
@@ -71,3 +71,6 @@ EXPORTS
XML_StopParser @63
XML_ResumeParser @64
XML_GetParsingStatus @65
+; added with version 2.1.1
+; XML_GetAttributeInfo @66
+ XML_SetHashSalt @66
diff --git a/chromium/third_party/expat/files/lib/xmlparse.c b/chromium/third_party/expat/files/lib/xmlparse.c
index ede7b5bb667..72b39fced49 100644
--- a/chromium/third_party/expat/files/lib/xmlparse.c
+++ b/chromium/third_party/expat/files/lib/xmlparse.c
@@ -6,11 +6,18 @@
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
#include <limits.h> /* UINT_MAX */
-#include <time.h> /* time() */
+
+#ifdef WIN32
+#define getpid GetCurrentProcessId
+#else
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/types.h> /* getpid() */
+#include <unistd.h> /* getpid() */
+#endif
#define XML_BUILDING_EXPAT 1
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -20,7 +27,7 @@
#include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H)
#include <expat_config.h>
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "ascii.h"
#include "expat.h"
@@ -432,7 +439,7 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end);
-static unsigned long generate_hash_secret_salt(void);
+static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser);
static XML_Parser
@@ -691,11 +698,50 @@ static const XML_Char implicitContext[] = {
};
static unsigned long
-generate_hash_secret_salt(void)
+gather_time_entropy(void)
{
- unsigned int seed = time(NULL) % UINT_MAX;
- srand(seed);
- return rand();
+#ifdef WIN32
+ FILETIME ft;
+ GetSystemTimeAsFileTime(&ft); /* never fails */
+ return ft.dwHighDateTime ^ ft.dwLowDateTime;
+#else
+ struct timeval tv;
+ int gettimeofday_res;
+
+ gettimeofday_res = gettimeofday(&tv, NULL);
+ assert (gettimeofday_res == 0);
+ (void)gettimeofday_res;
+
+ /* Microseconds time is <20 bits entropy */
+ return tv.tv_usec;
+#endif
+}
+
+static unsigned long
+generate_hash_secret_salt(XML_Parser parser)
+{
+#if defined(__UINTPTR_TYPE__)
+# define PARSER_CAST(p) (__UINTPTR_TYPE__)(p)
+#elif defined(_WIN64) && defined(_MSC_VER)
+# define PARSER_CAST(p) (unsigned __int64)(p)
+#else
+# define PARSER_CAST(p) (p)
+#endif
+
+ /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
+ unsigned long small_factory = 2147483647ul;
+ unsigned long long big_factory = 2305843009213693951ull;
+
+ /* Process ID is 0 bits entropy if attacker has local access
+ * XML_Parser address is few bits of entropy if attacker has local access */
+ const unsigned long entropy =
+ gather_time_entropy() ^ getpid() ^ (unsigned long)PARSER_CAST(parser);
+
+ if (sizeof(unsigned long) == 4) {
+ return entropy * small_factory;
+ } else {
+ return entropy * (unsigned long)big_factory;
+ }
}
static XML_Bool /* only valid for root parser */
@@ -703,7 +749,7 @@ startParsing(XML_Parser parser)
{
/* hash functions must be initialized before setContext() is called */
if (hash_secret_salt == 0)
- hash_secret_salt = generate_hash_secret_salt();
+ hash_secret_salt = generate_hash_secret_salt(parser);
if (ns) {
/* implicit context only set for root parser, since child
parsers (i.e. external entity parsers) will inherit it
@@ -1550,7 +1596,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
else if (bufferPtr == bufferEnd) {
const char *end;
int nLeftOver;
- enum XML_Error result;
+ enum XML_Status result;
parseEndByteIndex += len;
positionPtr = s;
ps_finalBuffer = (XML_Bool)isFinal;
@@ -1678,12 +1724,10 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
void * XMLCALL
XML_GetBuffer(XML_Parser parser, int len)
{
-/* BEGIN MOZILLA CHANGE (sanity check len) */
if (len < 0) {
errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-/* END MOZILLA CHANGE */
switch (ps_parsing) {
case XML_SUSPENDED:
errorCode = XML_ERROR_SUSPENDED;
@@ -1695,16 +1739,17 @@ XML_GetBuffer(XML_Parser parser, int len)
}
if (len > bufferLim - bufferEnd) {
- int neededSize = len + (int)(bufferEnd - bufferPtr);
-/* BEGIN MOZILLA CHANGE (sanity check neededSize) */
+#ifdef XML_CONTEXT_BYTES
+ int keep;
+#endif /* defined XML_CONTEXT_BYTES */
+ /* Do not invoke signed arithmetic overflow: */
+ int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
if (neededSize < 0) {
errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-/* END MOZILLA CHANGE */
#ifdef XML_CONTEXT_BYTES
- int keep = (int)(bufferPtr - buffer);
-
+ keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
neededSize += keep;
@@ -1729,16 +1774,13 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
- bufferSize *= 2;
-/* BEGIN MOZILLA CHANGE (prevent infinite loop on overflow) */
+ /* Do not invoke signed arithmetic overflow: */
+ bufferSize = (int) (2U * (unsigned) bufferSize);
} while (bufferSize < neededSize && bufferSize > 0);
-/* END MOZILLA CHANGE */
-/* BEGIN MOZILLA CHANGE (sanity check bufferSize) */
if (bufferSize <= 0) {
errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-/* END MOZILLA CHANGE */
newBuf = (char *)MALLOC(bufferSize);
if (newBuf == 0) {
errorCode = XML_ERROR_NO_MEMORY;
@@ -1860,7 +1902,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)
{
if (eventPtr)
- return parseEndByteIndex - (parseEndPtr - eventPtr);
+ return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1;
}
@@ -2434,11 +2476,11 @@ doContent(XML_Parser parser,
for (;;) {
int bufSize;
int convLen;
- XmlConvert(enc,
+ const enum XML_Convert_Result convert_res = XmlConvert(enc,
&fromPtr, rawNameEnd,
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
convLen = (int)(toPtr - (XML_Char *)tag->buf);
- if (fromPtr == rawNameEnd) {
+ if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
tag->name.strLen = convLen;
break;
}
@@ -2659,11 +2701,11 @@ doContent(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -2930,6 +2972,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
unsigned long uriHash = hash_secret_salt;
((XML_Char *)s)[-1] = 0; /* clear flag */
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
+ if (!id || !id->prefix)
+ return XML_ERROR_NO_MEMORY;
b = id->prefix->binding;
if (!b)
return XML_ERROR_UNBOUND_PREFIX;
@@ -3267,11 +3311,11 @@ doCdataSection(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = next;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -4930,9 +4974,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,
- const char *s,
- const char *end,
- const char **nextPtr)
+ const char *UNUSED_P(s),
+ const char *UNUSED_P(end),
+ const char **UNUSED_P(nextPtr))
{
return errorCode;
}
@@ -5348,6 +5392,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end)
{
if (MUST_CONVERT(enc, s)) {
+ enum XML_Convert_Result convert_res;
const char **eventPP;
const char **eventEndPP;
if (enc == encoding) {
@@ -5360,11 +5405,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
}
do {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
+ convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
*eventPP = s;
- } while (s != end);
+ } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
}
else
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
@@ -5494,6 +5539,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
return NULL;
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
sizeof(PREFIX));
+ if (!id->prefix)
+ return NULL;
if (id->prefix->name == poolStart(&dtd->pool))
poolFinish(&dtd->pool);
else
@@ -5841,7 +5888,6 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H
newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
if (!newE->defaultAtts) {
- ms->free_fcn(newE);
return 0;
}
}
@@ -6167,8 +6213,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
if (!pool->ptr && !poolGrow(pool))
return NULL;
for (;;) {
- XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
- if (ptr == end)
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
if (!poolGrow(pool))
return NULL;
@@ -6252,8 +6298,13 @@ poolGrow(STRING_POOL *pool)
}
}
if (pool->blocks && pool->start == pool->blocks->s) {
- int blockSize = (int)(pool->end - pool->start)*2;
- BLOCK *temp = (BLOCK *)
+ BLOCK *temp;
+ int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
+ temp = (BLOCK *)
pool->mem->realloc_fcn(pool->blocks,
(offsetof(BLOCK, s)
+ blockSize * sizeof(XML_Char)));
@@ -6268,6 +6319,10 @@ poolGrow(STRING_POOL *pool)
else {
BLOCK *tem;
int blockSize = (int)(pool->end - pool->start);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE;
else
diff --git a/chromium/third_party/expat/files/lib/xmlparse.c.original b/chromium/third_party/expat/files/lib/xmlparse.c.original
index f35aa36ba8a..b308e67e2d3 100644
--- a/chromium/third_party/expat/files/lib/xmlparse.c.original
+++ b/chromium/third_party/expat/files/lib/xmlparse.c.original
@@ -6,11 +6,18 @@
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
#include <limits.h> /* UINT_MAX */
-#include <time.h> /* time() */
+
+#ifdef WIN32
+#define getpid GetCurrentProcessId
+#else
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/types.h> /* getpid() */
+#include <unistd.h> /* getpid() */
+#endif
#define XML_BUILDING_EXPAT 1
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -20,7 +27,7 @@
#include "watcomconfig.h"
#elif defined(HAVE_EXPAT_CONFIG_H)
#include <expat_config.h>
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "ascii.h"
#include "expat.h"
@@ -432,7 +439,7 @@ static ELEMENT_TYPE *
getElementType(XML_Parser parser, const ENCODING *enc,
const char *ptr, const char *end);
-static unsigned long generate_hash_secret_salt(void);
+static unsigned long generate_hash_secret_salt(XML_Parser parser);
static XML_Bool startParsing(XML_Parser parser);
static XML_Parser
@@ -691,11 +698,38 @@ static const XML_Char implicitContext[] = {
};
static unsigned long
-generate_hash_secret_salt(void)
+gather_time_entropy(void)
{
- unsigned int seed = time(NULL) % UINT_MAX;
- srand(seed);
- return rand();
+#ifdef WIN32
+ FILETIME ft;
+ GetSystemTimeAsFileTime(&ft); /* never fails */
+ return ft.dwHighDateTime ^ ft.dwLowDateTime;
+#else
+ struct timeval tv;
+ int gettimeofday_res;
+
+ gettimeofday_res = gettimeofday(&tv, NULL);
+ assert (gettimeofday_res == 0);
+
+ /* Microseconds time is <20 bits entropy */
+ return tv.tv_usec;
+#endif
+}
+
+static unsigned long
+generate_hash_secret_salt(XML_Parser parser)
+{
+ /* Process ID is 0 bits entropy if attacker has local access
+ * XML_Parser address is few bits of entropy if attacker has local access */
+ const unsigned long entropy =
+ gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
+
+ /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
+ if (sizeof(unsigned long) == 4) {
+ return entropy * 2147483647;
+ } else {
+ return entropy * (unsigned long)2305843009213693951;
+ }
}
static XML_Bool /* only valid for root parser */
@@ -703,7 +737,7 @@ startParsing(XML_Parser parser)
{
/* hash functions must be initialized before setContext() is called */
if (hash_secret_salt == 0)
- hash_secret_salt = generate_hash_secret_salt();
+ hash_secret_salt = generate_hash_secret_salt(parser);
if (ns) {
/* implicit context only set for root parser, since child
parsers (i.e. external entity parsers) will inherit it
@@ -1550,7 +1584,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
else if (bufferPtr == bufferEnd) {
const char *end;
int nLeftOver;
- enum XML_Error result;
+ enum XML_Status result;
parseEndByteIndex += len;
positionPtr = s;
ps_finalBuffer = (XML_Bool)isFinal;
@@ -1678,6 +1712,10 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
void * XMLCALL
XML_GetBuffer(XML_Parser parser, int len)
{
+ if (len < 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
switch (ps_parsing) {
case XML_SUSPENDED:
errorCode = XML_ERROR_SUSPENDED;
@@ -1689,11 +1727,17 @@ XML_GetBuffer(XML_Parser parser, int len)
}
if (len > bufferLim - bufferEnd) {
- /* FIXME avoid integer overflow */
- int neededSize = len + (int)(bufferEnd - bufferPtr);
#ifdef XML_CONTEXT_BYTES
- int keep = (int)(bufferPtr - buffer);
-
+ int keep;
+#endif /* defined XML_CONTEXT_BYTES */
+ /* Do not invoke signed arithmetic overflow: */
+ int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
+ if (neededSize < 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
+#ifdef XML_CONTEXT_BYTES
+ keep = (int)(bufferPtr - buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
neededSize += keep;
@@ -1718,8 +1762,13 @@ XML_GetBuffer(XML_Parser parser, int len)
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
- bufferSize *= 2;
- } while (bufferSize < neededSize);
+ /* Do not invoke signed arithmetic overflow: */
+ bufferSize = (int) (2U * (unsigned) bufferSize);
+ } while (bufferSize < neededSize && bufferSize > 0);
+ if (bufferSize <= 0) {
+ errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
newBuf = (char *)MALLOC(bufferSize);
if (newBuf == 0) {
errorCode = XML_ERROR_NO_MEMORY;
@@ -1841,7 +1890,7 @@ XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)
{
if (eventPtr)
- return parseEndByteIndex - (parseEndPtr - eventPtr);
+ return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
return -1;
}
@@ -2415,11 +2464,11 @@ doContent(XML_Parser parser,
for (;;) {
int bufSize;
int convLen;
- XmlConvert(enc,
+ const enum XML_Convert_Result convert_res = XmlConvert(enc,
&fromPtr, rawNameEnd,
(ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
convLen = (int)(toPtr - (XML_Char *)tag->buf);
- if (fromPtr == rawNameEnd) {
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
tag->name.strLen = convLen;
break;
}
@@ -2640,11 +2689,11 @@ doContent(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -2911,6 +2960,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
unsigned long uriHash = hash_secret_salt;
((XML_Char *)s)[-1] = 0; /* clear flag */
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
+ if (!id || !id->prefix)
+ return XML_ERROR_NO_MEMORY;
b = id->prefix->binding;
if (!b)
return XML_ERROR_UNBOUND_PREFIX;
@@ -3248,11 +3299,11 @@ doCdataSection(XML_Parser parser,
if (MUST_CONVERT(enc, s)) {
for (;;) {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = next;
charDataHandler(handlerArg, dataBuf,
(int)(dataPtr - (ICHAR *)dataBuf));
- if (s == next)
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
*eventPP = s;
}
@@ -4911,9 +4962,9 @@ internalEntityProcessor(XML_Parser parser,
static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,
- const char *s,
- const char *end,
- const char **nextPtr)
+ const char *UNUSED_P(s),
+ const char *UNUSED_P(end),
+ const char **UNUSED_P(nextPtr))
{
return errorCode;
}
@@ -5329,6 +5380,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end)
{
if (MUST_CONVERT(enc, s)) {
+ enum XML_Convert_Result convert_res;
const char **eventPP;
const char **eventEndPP;
if (enc == encoding) {
@@ -5341,11 +5393,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
}
do {
ICHAR *dataPtr = (ICHAR *)dataBuf;
- XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
+ convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
*eventEndPP = s;
defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
*eventPP = s;
- } while (s != end);
+ } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
}
else
defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
@@ -5475,6 +5527,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc,
return NULL;
id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
sizeof(PREFIX));
+ if (!id->prefix)
+ return NULL;
if (id->prefix->name == poolStart(&dtd->pool))
poolFinish(&dtd->pool);
else
@@ -6148,8 +6202,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc,
if (!pool->ptr && !poolGrow(pool))
return NULL;
for (;;) {
- XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
- if (ptr == end)
+ const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
+ if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
if (!poolGrow(pool))
return NULL;
@@ -6233,8 +6287,13 @@ poolGrow(STRING_POOL *pool)
}
}
if (pool->blocks && pool->start == pool->blocks->s) {
- int blockSize = (int)(pool->end - pool->start)*2;
- BLOCK *temp = (BLOCK *)
+ BLOCK *temp;
+ int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
+ temp = (BLOCK *)
pool->mem->realloc_fcn(pool->blocks,
(offsetof(BLOCK, s)
+ blockSize * sizeof(XML_Char)));
@@ -6249,6 +6308,10 @@ poolGrow(STRING_POOL *pool)
else {
BLOCK *tem;
int blockSize = (int)(pool->end - pool->start);
+
+ if (blockSize < 0)
+ return XML_FALSE;
+
if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE;
else
diff --git a/chromium/third_party/expat/files/lib/xmlrole.c b/chromium/third_party/expat/files/lib/xmlrole.c
index 44772e21dd3..fcd0dc6948f 100644
--- a/chromium/third_party/expat/files/lib/xmlrole.c
+++ b/chromium/third_party/expat/files/lib/xmlrole.c
@@ -4,7 +4,7 @@
#include <stddef.h>
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "expat_external.h"
#include "internal.h"
@@ -195,9 +195,9 @@ prolog1(PROLOG_STATE *state,
static int PTRCALL
prolog2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -216,9 +216,9 @@ prolog2(PROLOG_STATE *state,
static int PTRCALL
doctype0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -264,9 +264,9 @@ doctype1(PROLOG_STATE *state,
static int PTRCALL
doctype2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -281,9 +281,9 @@ doctype2(PROLOG_STATE *state,
static int PTRCALL
doctype3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -298,9 +298,9 @@ doctype3(PROLOG_STATE *state,
static int PTRCALL
doctype4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -318,9 +318,9 @@ doctype4(PROLOG_STATE *state,
static int PTRCALL
doctype5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -437,9 +437,9 @@ externalSubset1(PROLOG_STATE *state,
static int PTRCALL
entity0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -457,9 +457,9 @@ entity0(PROLOG_STATE *state,
static int PTRCALL
entity1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -502,9 +502,9 @@ entity2(PROLOG_STATE *state,
static int PTRCALL
entity3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -519,9 +519,9 @@ entity3(PROLOG_STATE *state,
static int PTRCALL
entity4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -559,9 +559,9 @@ entity5(PROLOG_STATE *state,
static int PTRCALL
entity6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -605,9 +605,9 @@ entity7(PROLOG_STATE *state,
static int PTRCALL
entity8(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -622,9 +622,9 @@ entity8(PROLOG_STATE *state,
static int PTRCALL
entity9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -639,9 +639,9 @@ entity9(PROLOG_STATE *state,
static int PTRCALL
entity10(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -656,9 +656,9 @@ entity10(PROLOG_STATE *state,
static int PTRCALL
notation0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -697,9 +697,9 @@ notation1(PROLOG_STATE *state,
static int PTRCALL
notation2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -714,9 +714,9 @@ notation2(PROLOG_STATE *state,
static int PTRCALL
notation3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -732,9 +732,9 @@ notation3(PROLOG_STATE *state,
static int PTRCALL
notation4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -753,9 +753,9 @@ notation4(PROLOG_STATE *state,
static int PTRCALL
attlist0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -771,9 +771,9 @@ attlist0(PROLOG_STATE *state,
static int PTRCALL
attlist1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -833,9 +833,9 @@ attlist2(PROLOG_STATE *state,
static int PTRCALL
attlist3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -852,9 +852,9 @@ attlist3(PROLOG_STATE *state,
static int PTRCALL
attlist4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -872,9 +872,9 @@ attlist4(PROLOG_STATE *state,
static int PTRCALL
attlist5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -889,9 +889,9 @@ attlist5(PROLOG_STATE *state,
static int PTRCALL
attlist6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -906,9 +906,9 @@ attlist6(PROLOG_STATE *state,
static int PTRCALL
attlist7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -967,9 +967,9 @@ attlist8(PROLOG_STATE *state,
static int PTRCALL
attlist9(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -984,9 +984,9 @@ attlist9(PROLOG_STATE *state,
static int PTRCALL
element0(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1072,9 +1072,9 @@ element2(PROLOG_STATE *state,
static int PTRCALL
element3(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1097,9 +1097,9 @@ element3(PROLOG_STATE *state,
static int PTRCALL
element4(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1115,9 +1115,9 @@ element4(PROLOG_STATE *state,
static int PTRCALL
element5(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1136,9 +1136,9 @@ element5(PROLOG_STATE *state,
static int PTRCALL
element6(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1166,9 +1166,9 @@ element6(PROLOG_STATE *state,
static int PTRCALL
element7(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1240,9 +1240,9 @@ condSect0(PROLOG_STATE *state,
static int PTRCALL
condSect1(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1258,9 +1258,9 @@ condSect1(PROLOG_STATE *state,
static int PTRCALL
condSect2(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1277,9 +1277,9 @@ condSect2(PROLOG_STATE *state,
static int PTRCALL
declClose(PROLOG_STATE *state,
int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
switch (tok) {
case XML_TOK_PROLOG_S:
@@ -1292,11 +1292,11 @@ declClose(PROLOG_STATE *state,
}
static int PTRCALL
-error(PROLOG_STATE *state,
- int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+error(PROLOG_STATE *UNUSED_P(state),
+ int UNUSED_P(tok),
+ const char *UNUSED_P(ptr),
+ const char *UNUSED_P(end),
+ const ENCODING *UNUSED_P(enc))
{
return XML_ROLE_NONE;
}
diff --git a/chromium/third_party/expat/files/lib/xmltok.c b/chromium/third_party/expat/files/lib/xmltok.c
index bf09dfc72b9..b014e72ce16 100644
--- a/chromium/third_party/expat/files/lib/xmltok.c
+++ b/chromium/third_party/expat/files/lib/xmltok.c
@@ -4,7 +4,7 @@
#include <stddef.h>
-#ifdef COMPILED_FROM_DSP
+#ifdef WIN32
#include "winconfig.h"
#elif defined(MACOS_CLASSIC)
#include "macconfig.h"
@@ -16,7 +16,7 @@
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef WIN32 */
#include "expat_external.h"
#include "internal.h"
@@ -46,7 +46,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -56,7 +56,7 @@
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
- & (1 << (((byte)[1]) & 0x1F)))
+ & (1u << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits between
the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
@@ -69,7 +69,7 @@
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
- & (1 << (((byte)[2]) & 0x1F)))
+ & (1u << (((byte)[2]) & 0x1F)))
#define UTF8_GET_NAMING(pages, p, n) \
((n) == 2 \
@@ -122,19 +122,19 @@
((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
static int PTRFASTCALL
-isNever(const ENCODING *enc, const char *p)
+isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{
return 0;
}
static int PTRFASTCALL
-utf8_isName2(const ENCODING *enc, const char *p)
+utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isName3(const ENCODING *enc, const char *p)
+utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
}
@@ -142,13 +142,13 @@ utf8_isName3(const ENCODING *enc, const char *p)
#define utf8_isName4 isNever
static int PTRFASTCALL
-utf8_isNmstrt2(const ENCODING *enc, const char *p)
+utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isNmstrt3(const ENCODING *enc, const char *p)
+utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
}
@@ -156,19 +156,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define utf8_isNmstrt4 isNever
static int PTRFASTCALL
-utf8_isInvalid2(const ENCODING *enc, const char *p)
+utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID2((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid3(const ENCODING *enc, const char *p)
+utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID3((const unsigned char *)p);
}
static int PTRFASTCALL
-utf8_isInvalid4(const ENCODING *enc, const char *p)
+utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{
return UTF8_INVALID4((const unsigned char *)p);
}
@@ -222,6 +222,17 @@ struct normal_encoding {
E ## isInvalid3, \
E ## isInvalid4
+#define NULL_VTABLE \
+ /* isName2 */ NULL, \
+ /* isName3 */ NULL, \
+ /* isName4 */ NULL, \
+ /* isNmstrt2 */ NULL, \
+ /* isNmstrt3 */ NULL, \
+ /* isNmstrt4 */ NULL, \
+ /* isInvalid2 */ NULL, \
+ /* isInvalid3 */ NULL, \
+ /* isInvalid4 */ NULL
+
static int FASTCALL checkCharRefNumber(int);
#include "xmltok_impl.h"
@@ -318,39 +329,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0
};
-static void PTRCALL
-utf8_toUtf8(const ENCODING *enc,
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+{
+ const char * fromLim = *fromLimRef;
+ size_t walked = 0;
+ for (; fromLim > from; fromLim--, walked++) {
+ const unsigned char prev = (unsigned char)fromLim[-1];
+ if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
+ if (walked + 1 >= 4) {
+ fromLim += 4 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
+ if (walked + 1 >= 3) {
+ fromLim += 3 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
+ if (walked + 1 >= 2) {
+ fromLim += 2 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
+ break;
+ }
+ }
+ *fromLimRef = fromLim;
+}
+
+static enum XML_Convert_Result PTRCALL
+utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
char *to;
const char *from;
if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */
- for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
- if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
- break;
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
+ fromLim = *fromP + (toLim - *toP);
+ align_limit_to_full_utf8_characters(*fromP, &fromLim);
}
- for (to = *toP, from = *fromP; from != fromLim; from++, to++)
+ for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from;
*fromP = from;
*toP = to;
+
+ if ((to == toLim) && (from < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return res;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP;
const char *from = *fromP;
- while (from != fromLim && to != toLim) {
+ while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2:
+ if (fromLim - from < 2) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2;
break;
case BT_LEAD3:
+ if (fromLim - from < 3) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3;
@@ -358,8 +419,14 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD4:
{
unsigned long n;
- if (to + 1 == toLim)
+ if (toLim - to < 2) {
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
+ goto after;
+ }
+ if (fromLim - from < 4) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
+ }
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
@@ -374,9 +441,12 @@ utf8_toUtf16(const ENCODING *enc,
break;
}
}
+ if (from < fromLim)
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
after:
*fromP = from;
*toP = to;
+ return res;
}
#ifdef XML_NS
@@ -425,38 +495,43 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
-static void PTRCALL
-latin1_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
for (;;) {
unsigned char c;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP;
if (c & 0x80) {
if (toLim - *toP < 2)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++;
}
else {
if (*toP == toLim)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++;
}
}
}
-static void PTRCALL
-latin1_toUtf16(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -467,7 +542,7 @@ static const struct normal_encoding latin1_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -480,16 +555,21 @@ static const struct normal_encoding latin1_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
-static void PTRCALL
-ascii_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -500,7 +580,7 @@ static const struct normal_encoding ascii_encoding_ns = {
#include "asciitab.h"
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
#endif
@@ -513,7 +593,7 @@ static const struct normal_encoding ascii_encoding = {
#undef BT_COLON
/* BT_NONXML == 0 */
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
static int PTRFASTCALL
@@ -536,13 +616,14 @@ unicode_byte_type(char hi, char lo)
}
#define DEFINE_UTF16_TO_UTF8(E) \
-static void PTRCALL \
-E ## toUtf8(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
- const char *from; \
- for (from = *fromP; from != fromLim; from += 2) { \
+ const char *from = *fromP; \
+ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
+ for (; from < fromLim; from += 2) { \
int plane; \
unsigned char lo2; \
unsigned char lo = GET_LO(from); \
@@ -552,7 +633,7 @@ E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = lo; \
break; \
@@ -562,7 +643,7 @@ E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
@@ -570,7 +651,7 @@ E ## toUtf8(const ENCODING *enc, \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
@@ -580,7 +661,11 @@ E ## toUtf8(const ENCODING *enc, \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ } \
+ if (fromLim - from < 4) { \
+ *fromP = from; \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
@@ -596,20 +681,32 @@ E ## toUtf8(const ENCODING *enc, \
} \
} \
*fromP = from; \
+ if (from < fromLim) \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
+ else \
+ return XML_CONVERT_COMPLETED; \
}
#define DEFINE_UTF16_TO_UTF16(E) \
-static void PTRCALL \
-E ## toUtf16(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
+ fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
- && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
+ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
- for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
+ res = XML_CONVERT_INPUT_INCOMPLETE; \
+ } \
+ for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
+ if ((*toP == toLim) && (*fromP < fromLim)) \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ else \
+ return res; \
}
#define SET2(ptr, ch) \
@@ -726,7 +823,7 @@ static const struct normal_encoding little2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -745,7 +842,7 @@ static const struct normal_encoding little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#if BYTEORDER != 4321
@@ -758,7 +855,7 @@ static const struct normal_encoding internal_little2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -771,7 +868,7 @@ static const struct normal_encoding internal_little2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
#endif
@@ -867,7 +964,7 @@ static const struct normal_encoding big2_encoding_ns = {
#include "asciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -886,7 +983,7 @@ static const struct normal_encoding big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#if BYTEORDER != 1234
@@ -899,7 +996,7 @@ static const struct normal_encoding internal_big2_encoding_ns = {
#include "iasciitab.h"
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -912,7 +1009,7 @@ static const struct normal_encoding internal_big2_encoding = {
#undef BT_COLON
#include "latin1tab.h"
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
#endif
@@ -938,7 +1035,7 @@ streqci(const char *s1, const char *s2)
}
static void PTRCALL
-initUpdatePosition(const ENCODING *enc, const char *ptr,
+initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos)
{
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
@@ -1288,7 +1385,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
@@ -1299,21 +1396,21 @@ unknown_toUtf8(const ENCODING *enc,
const char *utf8;
int n;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
}
do {
@@ -1322,13 +1419,13 @@ unknown_toUtf8(const ENCODING *enc,
}
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
- while (*fromP != fromLim && *toP != toLim) {
+ while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) {
c = (unsigned short)
@@ -1340,6 +1437,11 @@ unknown_toUtf16(const ENCODING *enc,
(*fromP)++;
*(*toP)++ = c;
}
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
ENCODING *
@@ -1503,7 +1605,7 @@ initScan(const ENCODING * const *encodingTable,
{
const ENCODING **encPtr;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
encPtr = enc->encPtr;
if (ptr + 1 == end) {
diff --git a/chromium/third_party/expat/files/lib/xmltok.h b/chromium/third_party/expat/files/lib/xmltok.h
index ca867aa6b42..752007e8b9e 100644
--- a/chromium/third_party/expat/files/lib/xmltok.h
+++ b/chromium/third_party/expat/files/lib/xmltok.h
@@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *,
const char *,
const char **);
+enum XML_Convert_Result {
+ XML_CONVERT_COMPLETED = 0,
+ XML_CONVERT_INPUT_INCOMPLETE = 1,
+ XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */
+};
+
struct encoding {
SCANNER scanners[XML_N_STATES];
SCANNER literalScanners[XML_N_LITERAL_TYPES];
@@ -158,12 +164,12 @@ struct encoding {
const char *ptr,
const char *end,
const char **badPtr);
- void (PTRCALL *utf8Convert)(const ENCODING *enc,
+ enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
char **toP,
const char *toLim);
- void (PTRCALL *utf16Convert)(const ENCODING *enc,
+ enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc,
const char **fromP,
const char *fromLim,
unsigned short **toP,
diff --git a/chromium/third_party/expat/files/lib/xmltok_impl.c b/chromium/third_party/expat/files/lib/xmltok_impl.c
index 9c2895b8773..5f779c0571b 100644
--- a/chromium/third_party/expat/files/lib/xmltok_impl.c
+++ b/chromium/third_party/expat/files/lib/xmltok_impl.c
@@ -87,27 +87,45 @@
#define PREFIX(ident) ident
#endif
+
+#define HAS_CHARS(enc, ptr, end, count) \
+ (end - ptr >= count * MINBPC(enc))
+
+#define HAS_CHAR(enc, ptr, end) \
+ HAS_CHARS(enc, ptr, end, 1)
+
+#define REQUIRE_CHARS(enc, ptr, end, count) \
+ { \
+ if (! HAS_CHARS(enc, ptr, end, count)) { \
+ return XML_TOK_PARTIAL; \
+ } \
+ }
+
+#define REQUIRE_CHAR(enc, ptr, end) \
+ REQUIRE_CHARS(enc, ptr, end, 1)
+
+
/* ptr points to character following "<!-" */
static int PTRCALL
PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_MINUS:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -131,8 +149,7 @@ static int PTRCALL
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -147,11 +164,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_PERCNT:
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
/* don't allow <!ENTITY% foo "whatever"> */
switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
+PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, int *tokPtr)
{
int upper = 0;
@@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
{
int tok;
const char *target = ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_QUEST:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
return XML_TOK_INVALID;
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr + MINBPC(enc);
return tok;
@@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
}
static int PTRCALL
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
+PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, const char **nextTokPtr)
{
static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
ASCII_T, ASCII_A, ASCII_LSQB };
int i;
/* CDATA[ */
- if (end - ptr < 6 * MINBPC(enc))
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 6);
for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
*nextTokPtr = ptr;
@@ -305,7 +317,7 @@ static int PTRCALL
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
switch (BYTE_TYPE(enc, ptr)) {
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
break;
@@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
*nextTokPtr = ptr;
@@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
ptr += MINBPC(enc);
break;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -383,19 +392,18 @@ static int PTRCALL
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_CR: case BT_LF:
break;
@@ -432,7 +440,7 @@ static int PTRCALL
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -464,7 +472,7 @@ static int PTRCALL
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
- if (ptr != end) {
+ if (HAS_CHAR(enc, ptr, end)) {
if (CHAR_MATCHES(enc, ptr, ASCII_x))
return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
switch (BYTE_TYPE(enc, ptr)) {
@@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+ for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
break;
@@ -496,8 +504,7 @@ static int PTRCALL
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_NUM:
@@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon = 0;
#endif
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
int t;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == BT_EQUALS)
break;
@@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
#endif
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
open = BYTE_TYPE(enc, ptr);
if (open == BT_QUOT || open == BT_APOS)
break;
@@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* in attribute value */
for (;;) {
int t;
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
t = BYTE_TYPE(enc, ptr);
if (t == open)
break;
@@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
}
}
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_S:
case BT_CR:
@@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
/* ptr points to closing quote */
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_CR: case BT_LF:
@@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
#ifdef XML_NS
int hadColon;
#endif
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_EXCL:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
hadColon = 0;
#endif
/* we have a start-tag */
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
#ifdef XML_NS
@@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
}
hadColon = 1;
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
@@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_CR: case BT_LF:
{
ptr += MINBPC(enc);
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_GT:
@@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
case BT_SOL:
sol:
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
@@ -785,7 +782,7 @@ static int PTRCALL
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
case BT_CR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_DATA_NEWLINE;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
break;
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_RSQB;
if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr -= MINBPC(enc);
@@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
break;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
case BT_RSQB:
- if (ptr + MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 2)) {
if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
ptr += MINBPC(enc);
break;
}
- if (ptr + 2*MINBPC(enc) != end) {
+ if (HAS_CHARS(enc, ptr, end, 3)) {
if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
ptr += MINBPC(enc);
break;
@@ -884,8 +881,7 @@ static int PTRCALL
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_SEMI:
@@ -913,15 +909,14 @@ static int PTRCALL
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_CR: case BT_LF: case BT_S:
@@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
const char *ptr, const char *end,
const char **nextTokPtr)
{
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
int t = BYTE_TYPE(enc, ptr);
switch (t) {
INVALID_CASES(ptr, nextTokPtr)
@@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc,
ptr += MINBPC(enc);
if (t != open)
break;
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
switch (BYTE_TYPE(enc, ptr)) {
@@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
const char **nextTokPtr)
{
int tok;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
if (MINBPC(enc) > 1) {
size_t n = end - ptr;
@@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LT:
{
ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
switch (BYTE_TYPE(enc, ptr)) {
case BT_EXCL:
return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_S: case BT_LF:
for (;;) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
break;
switch (BYTE_TYPE(enc, ptr)) {
case BT_S: case BT_LF:
@@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_BRACKET;
case BT_RSQB:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_BRACKET;
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHARS(enc, ptr, end, 2);
if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
*nextTokPtr = ptr + 2*MINBPC(enc);
return XML_TOK_COND_SECT_CLOSE;
@@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
return XML_TOK_OPEN_PAREN;
case BT_RPAR:
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return -XML_TOK_CLOSE_PAREN;
switch (BYTE_TYPE(enc, ptr)) {
case BT_AST:
@@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
case BT_GT: case BT_RPAR: case BT_COMMA:
@@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
switch (tok) {
case XML_TOK_NAME:
- if (ptr == end)
- return XML_TOK_PARTIAL;
+ REQUIRE_CHAR(enc, ptr, end);
tok = XML_TOK_PREFIXED_NAME;
switch (BYTE_TYPE(enc, ptr)) {
CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *end, const char **nextTokPtr)
{
const char *start;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
+ else if (! HAS_CHAR(enc, ptr, end))
+ return XML_TOK_PARTIAL;
start = ptr;
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: ptr += n; break;
@@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
case BT_CR:
if (ptr == start) {
ptr += MINBPC(enc);
- if (ptr == end)
+ if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_TRAILING_CR;
if (BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
@@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
end = ptr + n;
}
}
- while (ptr != end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
INVALID_CASES(ptr, nextTokPtr)
case BT_LT:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
++level;
ptr += MINBPC(enc);
@@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
}
break;
case BT_RSQB:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
+ ptr += MINBPC(enc);
+ REQUIRE_CHAR(enc, ptr, end);
if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
ptr += MINBPC(enc);
if (level == 0) {
@@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
{
ptr += MINBPC(enc);
end -= MINBPC(enc);
- for (; ptr != end; ptr += MINBPC(enc)) {
+ for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
switch (BYTE_TYPE(enc, ptr)) {
case BT_DIGIT:
case BT_HEX:
@@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
}
static int PTRFASTCALL
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
{
int result = 0;
/* skip &# */
@@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
}
static int PTRCALL
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
+PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end)
{
switch ((end - ptr)/MINBPC(enc)) {
@@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
}
static int PTRCALL
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
+PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2)
{
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
- if (ptr1 == end1)
+ if (end1 - ptr1 < MINBPC(enc))
return 0;
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0;
@@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
const char *end,
POSITION *pos)
{
- while (ptr < end) {
+ while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
#define LEAD_CASE(n) \
case BT_LEAD ## n: \
@@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc,
case BT_CR:
pos->lineNumber++;
ptr += MINBPC(enc);
- if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
+ if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
pos->columnNumber = (XML_Size)-1;
break;
diff --git a/chromium/third_party/expat/files/lib/xmltok_impl.c.original b/chromium/third_party/expat/files/lib/xmltok_impl.c.original
deleted file mode 100644
index 9c2895b8773..00000000000
--- a/chromium/third_party/expat/files/lib/xmltok_impl.c.original
+++ /dev/null
@@ -1,1783 +0,0 @@
-/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
- See the file COPYING for copying permission.
-*/
-
-/* This file is included! */
-#ifdef XML_TOK_IMPL_C
-
-#ifndef IS_INVALID_CHAR
-#define IS_INVALID_CHAR(enc, ptr, n) (0)
-#endif
-
-#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
- case BT_LEAD ## n: \
- if (end - ptr < n) \
- return XML_TOK_PARTIAL_CHAR; \
- if (IS_INVALID_CHAR(enc, ptr, n)) { \
- *(nextTokPtr) = (ptr); \
- return XML_TOK_INVALID; \
- } \
- ptr += n; \
- break;
-
-#define INVALID_CASES(ptr, nextTokPtr) \
- INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
- INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
- INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
- case BT_NONXML: \
- case BT_MALFORM: \
- case BT_TRAIL: \
- *(nextTokPtr) = (ptr); \
- return XML_TOK_INVALID;
-
-#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
- case BT_LEAD ## n: \
- if (end - ptr < n) \
- return XML_TOK_PARTIAL_CHAR; \
- if (!IS_NAME_CHAR(enc, ptr, n)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_INVALID; \
- } \
- ptr += n; \
- break;
-
-#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
- case BT_NONASCII: \
- if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_INVALID; \
- } \
- case BT_NMSTRT: \
- case BT_HEX: \
- case BT_DIGIT: \
- case BT_NAME: \
- case BT_MINUS: \
- ptr += MINBPC(enc); \
- break; \
- CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
- CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
- CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
-
-#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
- case BT_LEAD ## n: \
- if (end - ptr < n) \
- return XML_TOK_PARTIAL_CHAR; \
- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_INVALID; \
- } \
- ptr += n; \
- break;
-
-#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
- case BT_NONASCII: \
- if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_INVALID; \
- } \
- case BT_NMSTRT: \
- case BT_HEX: \
- ptr += MINBPC(enc); \
- break; \
- CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
- CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
- CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
-
-#ifndef PREFIX
-#define PREFIX(ident) ident
-#endif
-
-/* ptr points to character following "<!-" */
-
-static int PTRCALL
-PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr != end) {
- if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- ptr += MINBPC(enc);
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- INVALID_CASES(ptr, nextTokPtr)
- case BT_MINUS:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_COMMENT;
- }
- break;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "<!" */
-
-static int PTRCALL
-PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_MINUS:
- return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_LSQB:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_COND_SECT_OPEN;
- case BT_NMSTRT:
- case BT_HEX:
- ptr += MINBPC(enc);
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_PERCNT:
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
- /* don't allow <!ENTITY% foo "whatever"> */
- switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
- case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- /* fall through */
- case BT_S: case BT_CR: case BT_LF:
- *nextTokPtr = ptr;
- return XML_TOK_DECL_OPEN;
- case BT_NMSTRT:
- case BT_HEX:
- ptr += MINBPC(enc);
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-static int PTRCALL
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
- const char *end, int *tokPtr)
-{
- int upper = 0;
- *tokPtr = XML_TOK_PI;
- if (end - ptr != MINBPC(enc)*3)
- return 1;
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case ASCII_x:
- break;
- case ASCII_X:
- upper = 1;
- break;
- default:
- return 1;
- }
- ptr += MINBPC(enc);
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case ASCII_m:
- break;
- case ASCII_M:
- upper = 1;
- break;
- default:
- return 1;
- }
- ptr += MINBPC(enc);
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case ASCII_l:
- break;
- case ASCII_L:
- upper = 1;
- break;
- default:
- return 1;
- }
- if (upper)
- return 0;
- *tokPtr = XML_TOK_XML_DECL;
- return 1;
-}
-
-/* ptr points to character following "<?" */
-
-static int PTRCALL
-PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- int tok;
- const char *target = ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_S: case BT_CR: case BT_LF:
- if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- ptr += MINBPC(enc);
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- INVALID_CASES(ptr, nextTokPtr)
- case BT_QUEST:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- *nextTokPtr = ptr + MINBPC(enc);
- return tok;
- }
- break;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- return XML_TOK_PARTIAL;
- case BT_QUEST:
- if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- *nextTokPtr = ptr + MINBPC(enc);
- return tok;
- }
- /* fall through */
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-static int PTRCALL
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
- ASCII_T, ASCII_A, ASCII_LSQB };
- int i;
- /* CDATA[ */
- if (end - ptr < 6 * MINBPC(enc))
- return XML_TOK_PARTIAL;
- for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
- if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_CDATA_SECT_OPEN;
-}
-
-static int PTRCALL
-PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_NONE;
- if (MINBPC(enc) > 1) {
- size_t n = end - ptr;
- if (n & (MINBPC(enc) - 1)) {
- n &= ~(MINBPC(enc) - 1);
- if (n == 0)
- return XML_TOK_PARTIAL;
- end = ptr + n;
- }
- }
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_RSQB:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
- break;
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- ptr -= MINBPC(enc);
- break;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CDATA_SECT_CLOSE;
- case BT_CR:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (BYTE_TYPE(enc, ptr) == BT_LF)
- ptr += MINBPC(enc);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_NEWLINE;
- case BT_LF:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_DATA_NEWLINE;
- INVALID_CASES(ptr, nextTokPtr)
- default:
- ptr += MINBPC(enc);
- break;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: \
- if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_DATA_CHARS; \
- } \
- ptr += n; \
- break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_NONXML:
- case BT_MALFORM:
- case BT_TRAIL:
- case BT_CR:
- case BT_LF:
- case BT_RSQB:
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
-}
-
-/* ptr points to character following "</" */
-
-static int PTRCALL
-PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_S: case BT_CR: case BT_LF:
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_S: case BT_CR: case BT_LF:
- break;
- case BT_GT:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_END_TAG;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-#ifdef XML_NS
- case BT_COLON:
- /* no need to check qname syntax here,
- since end-tag must match exactly */
- ptr += MINBPC(enc);
- break;
-#endif
- case BT_GT:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_END_TAG;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "&#X" */
-
-static int PTRCALL
-PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_DIGIT:
- case BT_HEX:
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_DIGIT:
- case BT_HEX:
- break;
- case BT_SEMI:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CHAR_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "&#" */
-
-static int PTRCALL
-PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- if (ptr != end) {
- if (CHAR_MATCHES(enc, ptr, ASCII_x))
- return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_DIGIT:
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_DIGIT:
- break;
- case BT_SEMI:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CHAR_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "&" */
-
-static int PTRCALL
-PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- case BT_NUM:
- return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_SEMI:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_ENTITY_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following first character of attribute name */
-
-static int PTRCALL
-PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
-#ifdef XML_NS
- int hadColon = 0;
-#endif
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
-#ifdef XML_NS
- case BT_COLON:
- if (hadColon) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- hadColon = 1;
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- break;
-#endif
- case BT_S: case BT_CR: case BT_LF:
- for (;;) {
- int t;
-
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- t = BYTE_TYPE(enc, ptr);
- if (t == BT_EQUALS)
- break;
- switch (t) {
- case BT_S:
- case BT_LF:
- case BT_CR:
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- /* fall through */
- case BT_EQUALS:
- {
- int open;
-#ifdef XML_NS
- hadColon = 0;
-#endif
- for (;;) {
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- open = BYTE_TYPE(enc, ptr);
- if (open == BT_QUOT || open == BT_APOS)
- break;
- switch (open) {
- case BT_S:
- case BT_LF:
- case BT_CR:
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- ptr += MINBPC(enc);
- /* in attribute value */
- for (;;) {
- int t;
- if (ptr == end)
- return XML_TOK_PARTIAL;
- t = BYTE_TYPE(enc, ptr);
- if (t == open)
- break;
- switch (t) {
- INVALID_CASES(ptr, nextTokPtr)
- case BT_AMP:
- {
- int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
- if (tok <= 0) {
- if (tok == XML_TOK_INVALID)
- *nextTokPtr = ptr;
- return tok;
- }
- break;
- }
- case BT_LT:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_S:
- case BT_CR:
- case BT_LF:
- break;
- case BT_SOL:
- goto sol;
- case BT_GT:
- goto gt;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- /* ptr points to closing quote */
- for (;;) {
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- case BT_S: case BT_CR: case BT_LF:
- continue;
- case BT_GT:
- gt:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_START_TAG_WITH_ATTS;
- case BT_SOL:
- sol:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- break;
- }
- break;
- }
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "<" */
-
-static int PTRCALL
-PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
-#ifdef XML_NS
- int hadColon;
-#endif
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- case BT_EXCL:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_MINUS:
- return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_LSQB:
- return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
- end, nextTokPtr);
- }
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case BT_QUEST:
- return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_SOL:
- return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
-#ifdef XML_NS
- hadColon = 0;
-#endif
- /* we have a start-tag */
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
-#ifdef XML_NS
- case BT_COLON:
- if (hadColon) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- hadColon = 1;
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- break;
-#endif
- case BT_S: case BT_CR: case BT_LF:
- {
- ptr += MINBPC(enc);
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- case BT_GT:
- goto gt;
- case BT_SOL:
- goto sol;
- case BT_S: case BT_CR: case BT_LF:
- ptr += MINBPC(enc);
- continue;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
- }
- return XML_TOK_PARTIAL;
- }
- case BT_GT:
- gt:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_START_TAG_NO_ATTS;
- case BT_SOL:
- sol:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-static int PTRCALL
-PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_NONE;
- if (MINBPC(enc) > 1) {
- size_t n = end - ptr;
- if (n & (MINBPC(enc) - 1)) {
- n &= ~(MINBPC(enc) - 1);
- if (n == 0)
- return XML_TOK_PARTIAL;
- end = ptr + n;
- }
- }
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_LT:
- return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_AMP:
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_CR:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_TRAILING_CR;
- if (BYTE_TYPE(enc, ptr) == BT_LF)
- ptr += MINBPC(enc);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_NEWLINE;
- case BT_LF:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_DATA_NEWLINE;
- case BT_RSQB:
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_TRAILING_RSQB;
- if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
- break;
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_TRAILING_RSQB;
- if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- ptr -= MINBPC(enc);
- break;
- }
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- INVALID_CASES(ptr, nextTokPtr)
- default:
- ptr += MINBPC(enc);
- break;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: \
- if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
- *nextTokPtr = ptr; \
- return XML_TOK_DATA_CHARS; \
- } \
- ptr += n; \
- break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_RSQB:
- if (ptr + MINBPC(enc) != end) {
- if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
- ptr += MINBPC(enc);
- break;
- }
- if (ptr + 2*MINBPC(enc) != end) {
- if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
- ptr += MINBPC(enc);
- break;
- }
- *nextTokPtr = ptr + 2*MINBPC(enc);
- return XML_TOK_INVALID;
- }
- }
- /* fall through */
- case BT_AMP:
- case BT_LT:
- case BT_NONXML:
- case BT_MALFORM:
- case BT_TRAIL:
- case BT_CR:
- case BT_LF:
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
-}
-
-/* ptr points to character following "%" */
-
-static int PTRCALL
-PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
- *nextTokPtr = ptr;
- return XML_TOK_PERCENT;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_SEMI:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_PARAM_ENTITY_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-static int PTRCALL
-PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_CR: case BT_LF: case BT_S:
- case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
- *nextTokPtr = ptr;
- return XML_TOK_POUND_NAME;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return -XML_TOK_POUND_NAME;
-}
-
-static int PTRCALL
-PREFIX(scanLit)(int open, const ENCODING *enc,
- const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- while (ptr != end) {
- int t = BYTE_TYPE(enc, ptr);
- switch (t) {
- INVALID_CASES(ptr, nextTokPtr)
- case BT_QUOT:
- case BT_APOS:
- ptr += MINBPC(enc);
- if (t != open)
- break;
- if (ptr == end)
- return -XML_TOK_LITERAL;
- *nextTokPtr = ptr;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_S: case BT_CR: case BT_LF:
- case BT_GT: case BT_PERCNT: case BT_LSQB:
- return XML_TOK_LITERAL;
- default:
- return XML_TOK_INVALID;
- }
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-static int PTRCALL
-PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
- const char **nextTokPtr)
-{
- int tok;
- if (ptr == end)
- return XML_TOK_NONE;
- if (MINBPC(enc) > 1) {
- size_t n = end - ptr;
- if (n & (MINBPC(enc) - 1)) {
- n &= ~(MINBPC(enc) - 1);
- if (n == 0)
- return XML_TOK_PARTIAL;
- end = ptr + n;
- }
- }
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_QUOT:
- return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_APOS:
- return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_LT:
- {
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_EXCL:
- return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_QUEST:
- return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_NMSTRT:
- case BT_HEX:
- case BT_NONASCII:
- case BT_LEAD2:
- case BT_LEAD3:
- case BT_LEAD4:
- *nextTokPtr = ptr - MINBPC(enc);
- return XML_TOK_INSTANCE_START;
- }
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- case BT_CR:
- if (ptr + MINBPC(enc) == end) {
- *nextTokPtr = end;
- /* indicate that this might be part of a CR/LF pair */
- return -XML_TOK_PROLOG_S;
- }
- /* fall through */
- case BT_S: case BT_LF:
- for (;;) {
- ptr += MINBPC(enc);
- if (ptr == end)
- break;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_S: case BT_LF:
- break;
- case BT_CR:
- /* don't split CR/LF pair */
- if (ptr + MINBPC(enc) != end)
- break;
- /* fall through */
- default:
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_S;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_S;
- case BT_PERCNT:
- return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- case BT_COMMA:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_COMMA;
- case BT_LSQB:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_OPEN_BRACKET;
- case BT_RSQB:
- ptr += MINBPC(enc);
- if (ptr == end)
- return -XML_TOK_CLOSE_BRACKET;
- if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if (ptr + MINBPC(enc) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
- *nextTokPtr = ptr + 2*MINBPC(enc);
- return XML_TOK_COND_SECT_CLOSE;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_CLOSE_BRACKET;
- case BT_LPAR:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_OPEN_PAREN;
- case BT_RPAR:
- ptr += MINBPC(enc);
- if (ptr == end)
- return -XML_TOK_CLOSE_PAREN;
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_AST:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CLOSE_PAREN_ASTERISK;
- case BT_QUEST:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CLOSE_PAREN_QUESTION;
- case BT_PLUS:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_CLOSE_PAREN_PLUS;
- case BT_CR: case BT_LF: case BT_S:
- case BT_GT: case BT_COMMA: case BT_VERBAR:
- case BT_RPAR:
- *nextTokPtr = ptr;
- return XML_TOK_CLOSE_PAREN;
- }
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case BT_VERBAR:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_OR;
- case BT_GT:
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_DECL_CLOSE;
- case BT_NUM:
- return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: \
- if (end - ptr < n) \
- return XML_TOK_PARTIAL_CHAR; \
- if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
- ptr += n; \
- tok = XML_TOK_NAME; \
- break; \
- } \
- if (IS_NAME_CHAR(enc, ptr, n)) { \
- ptr += n; \
- tok = XML_TOK_NMTOKEN; \
- break; \
- } \
- *nextTokPtr = ptr; \
- return XML_TOK_INVALID;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_NMSTRT:
- case BT_HEX:
- tok = XML_TOK_NAME;
- ptr += MINBPC(enc);
- break;
- case BT_DIGIT:
- case BT_NAME:
- case BT_MINUS:
-#ifdef XML_NS
- case BT_COLON:
-#endif
- tok = XML_TOK_NMTOKEN;
- ptr += MINBPC(enc);
- break;
- case BT_NONASCII:
- if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
- ptr += MINBPC(enc);
- tok = XML_TOK_NAME;
- break;
- }
- if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
- ptr += MINBPC(enc);
- tok = XML_TOK_NMTOKEN;
- break;
- }
- /* fall through */
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- case BT_GT: case BT_RPAR: case BT_COMMA:
- case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
- case BT_S: case BT_CR: case BT_LF:
- *nextTokPtr = ptr;
- return tok;
-#ifdef XML_NS
- case BT_COLON:
- ptr += MINBPC(enc);
- switch (tok) {
- case XML_TOK_NAME:
- if (ptr == end)
- return XML_TOK_PARTIAL;
- tok = XML_TOK_PREFIXED_NAME;
- switch (BYTE_TYPE(enc, ptr)) {
- CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
- default:
- tok = XML_TOK_NMTOKEN;
- break;
- }
- break;
- case XML_TOK_PREFIXED_NAME:
- tok = XML_TOK_NMTOKEN;
- break;
- }
- break;
-#endif
- case BT_PLUS:
- if (tok == XML_TOK_NMTOKEN) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_NAME_PLUS;
- case BT_AST:
- if (tok == XML_TOK_NMTOKEN) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_NAME_ASTERISK;
- case BT_QUEST:
- if (tok == XML_TOK_NMTOKEN) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_NAME_QUESTION;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- return -tok;
-}
-
-static int PTRCALL
-PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- const char *start;
- if (ptr == end)
- return XML_TOK_NONE;
- start = ptr;
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: ptr += n; break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_AMP:
- if (ptr == start)
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_LT:
- /* this is for inside entity references */
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case BT_LF:
- if (ptr == start) {
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_DATA_NEWLINE;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_CR:
- if (ptr == start) {
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_TRAILING_CR;
- if (BYTE_TYPE(enc, ptr) == BT_LF)
- ptr += MINBPC(enc);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_NEWLINE;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_S:
- if (ptr == start) {
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_ATTRIBUTE_VALUE_S;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
-}
-
-static int PTRCALL
-PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- const char *start;
- if (ptr == end)
- return XML_TOK_NONE;
- start = ptr;
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: ptr += n; break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_AMP:
- if (ptr == start)
- return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_PERCNT:
- if (ptr == start) {
- int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
- end, nextTokPtr);
- return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_LF:
- if (ptr == start) {
- *nextTokPtr = ptr + MINBPC(enc);
- return XML_TOK_DATA_NEWLINE;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- case BT_CR:
- if (ptr == start) {
- ptr += MINBPC(enc);
- if (ptr == end)
- return XML_TOK_TRAILING_CR;
- if (BYTE_TYPE(enc, ptr) == BT_LF)
- ptr += MINBPC(enc);
- *nextTokPtr = ptr;
- return XML_TOK_DATA_NEWLINE;
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
-}
-
-#ifdef XML_DTD
-
-static int PTRCALL
-PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
- const char *end, const char **nextTokPtr)
-{
- int level = 0;
- if (MINBPC(enc) > 1) {
- size_t n = end - ptr;
- if (n & (MINBPC(enc) - 1)) {
- n &= ~(MINBPC(enc) - 1);
- end = ptr + n;
- }
- }
- while (ptr != end) {
- switch (BYTE_TYPE(enc, ptr)) {
- INVALID_CASES(ptr, nextTokPtr)
- case BT_LT:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
- ++level;
- ptr += MINBPC(enc);
- }
- }
- break;
- case BT_RSQB:
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
- if ((ptr += MINBPC(enc)) == end)
- return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
- ptr += MINBPC(enc);
- if (level == 0) {
- *nextTokPtr = ptr;
- return XML_TOK_IGNORE_SECT;
- }
- --level;
- }
- }
- break;
- default:
- ptr += MINBPC(enc);
- break;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-#endif /* XML_DTD */
-
-static int PTRCALL
-PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
- const char **badPtr)
-{
- ptr += MINBPC(enc);
- end -= MINBPC(enc);
- for (; ptr != end; ptr += MINBPC(enc)) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_DIGIT:
- case BT_HEX:
- case BT_MINUS:
- case BT_APOS:
- case BT_LPAR:
- case BT_RPAR:
- case BT_PLUS:
- case BT_COMMA:
- case BT_SOL:
- case BT_EQUALS:
- case BT_QUEST:
- case BT_CR:
- case BT_LF:
- case BT_SEMI:
- case BT_EXCL:
- case BT_AST:
- case BT_PERCNT:
- case BT_NUM:
-#ifdef XML_NS
- case BT_COLON:
-#endif
- break;
- case BT_S:
- if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
- *badPtr = ptr;
- return 0;
- }
- break;
- case BT_NAME:
- case BT_NMSTRT:
- if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
- break;
- default:
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case 0x24: /* $ */
- case 0x40: /* @ */
- break;
- default:
- *badPtr = ptr;
- return 0;
- }
- break;
- }
- }
- return 1;
-}
-
-/* This must only be called for a well-formed start-tag or empty
- element tag. Returns the number of attributes. Pointers to the
- first attsMax attributes are stored in atts.
-*/
-
-static int PTRCALL
-PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
- int attsMax, ATTRIBUTE *atts)
-{
- enum { other, inName, inValue } state = inName;
- int nAtts = 0;
- int open = 0; /* defined when state == inValue;
- initialization just to shut up compilers */
-
- for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define START_NAME \
- if (state == other) { \
- if (nAtts < attsMax) { \
- atts[nAtts].name = ptr; \
- atts[nAtts].normalized = 1; \
- } \
- state = inName; \
- }
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_NONASCII:
- case BT_NMSTRT:
- case BT_HEX:
- START_NAME
- break;
-#undef START_NAME
- case BT_QUOT:
- if (state != inValue) {
- if (nAtts < attsMax)
- atts[nAtts].valuePtr = ptr + MINBPC(enc);
- state = inValue;
- open = BT_QUOT;
- }
- else if (open == BT_QUOT) {
- state = other;
- if (nAtts < attsMax)
- atts[nAtts].valueEnd = ptr;
- nAtts++;
- }
- break;
- case BT_APOS:
- if (state != inValue) {
- if (nAtts < attsMax)
- atts[nAtts].valuePtr = ptr + MINBPC(enc);
- state = inValue;
- open = BT_APOS;
- }
- else if (open == BT_APOS) {
- state = other;
- if (nAtts < attsMax)
- atts[nAtts].valueEnd = ptr;
- nAtts++;
- }
- break;
- case BT_AMP:
- if (nAtts < attsMax)
- atts[nAtts].normalized = 0;
- break;
- case BT_S:
- if (state == inName)
- state = other;
- else if (state == inValue
- && nAtts < attsMax
- && atts[nAtts].normalized
- && (ptr == atts[nAtts].valuePtr
- || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
- || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
- || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
- atts[nAtts].normalized = 0;
- break;
- case BT_CR: case BT_LF:
- /* This case ensures that the first attribute name is counted
- Apart from that we could just change state on the quote. */
- if (state == inName)
- state = other;
- else if (state == inValue && nAtts < attsMax)
- atts[nAtts].normalized = 0;
- break;
- case BT_GT:
- case BT_SOL:
- if (state != inValue)
- return nAtts;
- break;
- default:
- break;
- }
- }
- /* not reached */
-}
-
-static int PTRFASTCALL
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
-{
- int result = 0;
- /* skip &# */
- ptr += 2*MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
- for (ptr += MINBPC(enc);
- !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
- ptr += MINBPC(enc)) {
- int c = BYTE_TO_ASCII(enc, ptr);
- switch (c) {
- case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
- case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
- result <<= 4;
- result |= (c - ASCII_0);
- break;
- case ASCII_A: case ASCII_B: case ASCII_C:
- case ASCII_D: case ASCII_E: case ASCII_F:
- result <<= 4;
- result += 10 + (c - ASCII_A);
- break;
- case ASCII_a: case ASCII_b: case ASCII_c:
- case ASCII_d: case ASCII_e: case ASCII_f:
- result <<= 4;
- result += 10 + (c - ASCII_a);
- break;
- }
- if (result >= 0x110000)
- return -1;
- }
- }
- else {
- for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
- int c = BYTE_TO_ASCII(enc, ptr);
- result *= 10;
- result += (c - ASCII_0);
- if (result >= 0x110000)
- return -1;
- }
- }
- return checkCharRefNumber(result);
-}
-
-static int PTRCALL
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
- const char *end)
-{
- switch ((end - ptr)/MINBPC(enc)) {
- case 2:
- if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case ASCII_l:
- return ASCII_LT;
- case ASCII_g:
- return ASCII_GT;
- }
- }
- break;
- case 3:
- if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_p))
- return ASCII_AMP;
- }
- }
- break;
- case 4:
- switch (BYTE_TO_ASCII(enc, ptr)) {
- case ASCII_q:
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_t))
- return ASCII_QUOT;
- }
- }
- break;
- case ASCII_a:
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
- ptr += MINBPC(enc);
- if (CHAR_MATCHES(enc, ptr, ASCII_s))
- return ASCII_APOS;
- }
- }
- break;
- }
- }
- return 0;
-}
-
-static int PTRCALL
-PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
-{
- for (;;) {
- switch (BYTE_TYPE(enc, ptr1)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: \
- if (*ptr1++ != *ptr2++) \
- return 0;
- LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
-#undef LEAD_CASE
- /* fall through */
- if (*ptr1++ != *ptr2++)
- return 0;
- break;
- case BT_NONASCII:
- case BT_NMSTRT:
-#ifdef XML_NS
- case BT_COLON:
-#endif
- case BT_HEX:
- case BT_DIGIT:
- case BT_NAME:
- case BT_MINUS:
- if (*ptr2++ != *ptr1++)
- return 0;
- if (MINBPC(enc) > 1) {
- if (*ptr2++ != *ptr1++)
- return 0;
- if (MINBPC(enc) > 2) {
- if (*ptr2++ != *ptr1++)
- return 0;
- if (MINBPC(enc) > 3) {
- if (*ptr2++ != *ptr1++)
- return 0;
- }
- }
- }
- break;
- default:
- if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
- return 1;
- switch (BYTE_TYPE(enc, ptr2)) {
- case BT_LEAD2:
- case BT_LEAD3:
- case BT_LEAD4:
- case BT_NONASCII:
- case BT_NMSTRT:
-#ifdef XML_NS
- case BT_COLON:
-#endif
- case BT_HEX:
- case BT_DIGIT:
- case BT_NAME:
- case BT_MINUS:
- return 0;
- default:
- return 1;
- }
- }
- }
- /* not reached */
-}
-
-static int PTRCALL
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
- const char *end1, const char *ptr2)
-{
- for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
- if (ptr1 == end1)
- return 0;
- if (!CHAR_MATCHES(enc, ptr1, *ptr2))
- return 0;
- }
- return ptr1 == end1;
-}
-
-static int PTRFASTCALL
-PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
-{
- const char *start = ptr;
- for (;;) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: ptr += n; break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_NONASCII:
- case BT_NMSTRT:
-#ifdef XML_NS
- case BT_COLON:
-#endif
- case BT_HEX:
- case BT_DIGIT:
- case BT_NAME:
- case BT_MINUS:
- ptr += MINBPC(enc);
- break;
- default:
- return (int)(ptr - start);
- }
- }
-}
-
-static const char * PTRFASTCALL
-PREFIX(skipS)(const ENCODING *enc, const char *ptr)
-{
- for (;;) {
- switch (BYTE_TYPE(enc, ptr)) {
- case BT_LF:
- case BT_CR:
- case BT_S:
- ptr += MINBPC(enc);
- break;
- default:
- return ptr;
- }
- }
-}
-
-static void PTRCALL
-PREFIX(updatePosition)(const ENCODING *enc,
- const char *ptr,
- const char *end,
- POSITION *pos)
-{
- while (ptr < end) {
- switch (BYTE_TYPE(enc, ptr)) {
-#define LEAD_CASE(n) \
- case BT_LEAD ## n: \
- ptr += n; \
- break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
-#undef LEAD_CASE
- case BT_LF:
- pos->columnNumber = (XML_Size)-1;
- pos->lineNumber++;
- ptr += MINBPC(enc);
- break;
- case BT_CR:
- pos->lineNumber++;
- ptr += MINBPC(enc);
- if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
- ptr += MINBPC(enc);
- pos->columnNumber = (XML_Size)-1;
- break;
- default:
- ptr += MINBPC(enc);
- break;
- }
- pos->columnNumber++;
- }
-}
-
-#undef DO_LEAD_CASE
-#undef MULTIBYTE_CASES
-#undef INVALID_CASES
-#undef CHECK_NAME_CASE
-#undef CHECK_NAME_CASES
-#undef CHECK_NMSTRT_CASE
-#undef CHECK_NMSTRT_CASES
-
-#endif /* XML_TOK_IMPL_C */