diff options
Diffstat (limited to 'chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h')
-rw-r--r-- | chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h new file mode 100644 index 00000000000..79b070f12ea --- /dev/null +++ b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h @@ -0,0 +1,201 @@ +/* + * Copyright 2013 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkPdfNativeTokenizer_DEFINED +#define SkPdfNativeTokenizer_DEFINED + +#include <math.h> +#include <string.h> + +#include "SkPdfConfig.h" +#include "SkTDArray.h" +#include "SkTDict.h" + +// All these constants are defined by the PDF 1.4 Spec. + +class SkPdfDictionary; +class SkPdfImageDictionary; +class SkPdfNativeDoc; +class SkPdfNativeObject; + + +// White Spaces +#define kNUL_PdfWhiteSpace '\x00' +#define kHT_PdfWhiteSpace '\x09' +#define kLF_PdfWhiteSpace '\x0A' +#define kFF_PdfWhiteSpace '\x0C' +#define kCR_PdfWhiteSpace '\x0D' +#define kSP_PdfWhiteSpace '\x20' + +// PdfDelimiters +#define kOpenedRoundBracket_PdfDelimiter '(' +#define kClosedRoundBracket_PdfDelimiter ')' +#define kOpenedInequityBracket_PdfDelimiter '<' +#define kClosedInequityBracket_PdfDelimiter '>' +#define kOpenedSquareBracket_PdfDelimiter '[' +#define kClosedSquareBracket_PdfDelimiter ']' +#define kOpenedCurlyBracket_PdfDelimiter '{' +#define kClosedCurlyBracket_PdfDelimiter '}' +#define kNamed_PdfDelimiter '/' +#define kComment_PdfDelimiter '%' + +#define kEscape_PdfSpecial '\\' +#define kBackspace_PdfSpecial '\x08' + +// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? +// we should evaluate all options. might be even different from one machine to another +// 1) expand expression, let compiler optimize it +// 2) binary search +// 3) linear search in array +// 4) vector (e.f. T type[256] .. return type[ch] ... +// 5) manually build the expression with least number of operators, e.g. for consecutive +// chars, we can use an binary equal ignoring last bit +#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \ + ((ch)==kHT_PdfWhiteSpace)|| \ + ((ch)==kLF_PdfWhiteSpace)|| \ + ((ch)==kFF_PdfWhiteSpace)|| \ + ((ch)==kCR_PdfWhiteSpace)|| \ + ((ch)==kSP_PdfWhiteSpace)) + +#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) + + +#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ + ((ch)==kClosedRoundBracket_PdfDelimiter)||\ + ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ + ((ch)==kClosedInequityBracket_PdfDelimiter)||\ + ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ + ((ch)==kClosedSquareBracket_PdfDelimiter)||\ + ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ + ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ + ((ch)==kNamed_PdfDelimiter)||\ + ((ch)==kComment_PdfDelimiter)) + +#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) + +#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') +#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') + +const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end); +const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end); + +#define BUFFER_SIZE 1024 + +/** \class SkPdfAllocator + * + * An allocator only allocates memory, and it deletes it all when the allocator is destroyed. + * This strategy would allow us not to do any garbage collection while we parse and/or render + * a pdf. + * + */ +class SkPdfAllocator { +public: + SkPdfAllocator() { + fSizeInBytes = sizeof(*this); + fCurrent = allocBlock(); + fCurrentUsed = 0; + } + + ~SkPdfAllocator(); + + // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called. + SkPdfNativeObject* allocObject(); + + // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called. + void* alloc(size_t bytes) { + void* data = malloc(bytes); + fHandles.push(data); + fSizeInBytes += bytes; + return data; + } + + // Returns the number of bytes used in this allocator. + size_t bytesUsed() const { + return fSizeInBytes; + } + +private: + SkTDArray<SkPdfNativeObject*> fHistory; + SkTDArray<void*> fHandles; + SkPdfNativeObject* fCurrent; + int fCurrentUsed; + + SkPdfNativeObject* allocBlock(); + size_t fSizeInBytes; +}; + +// Type of a parsed token. +enum SkPdfTokenType { + kKeyword_TokenType, + kObject_TokenType, +}; + + +/** \struct PdfToken + * + * Stores the result of the parsing - a keyword or an object. + * + */ +struct PdfToken { + const char* fKeyword; + size_t fKeywordLength; + SkPdfNativeObject* fObject; + SkPdfTokenType fType; + + PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} +}; + +/** \class SkPdfNativeTokenizer + * + * Responsible to tokenize a stream in small tokens, eityh a keyword or an object. + * A renderer can feed on the tokens and render a pdf. + * + */ +class SkPdfNativeTokenizer { +public: + SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, + SkPdfAllocator* allocator, SkPdfNativeDoc* doc); + SkPdfNativeTokenizer(const unsigned char* buffer, int len, + SkPdfAllocator* allocator, SkPdfNativeDoc* doc); + + virtual ~SkPdfNativeTokenizer(); + + // Reads one token. Returns false if there are no more tokens. + // If writeDiff is true, and a token was read, create a PNG highlighting + // the difference caused by this command in /tmp/log_step_by_step. + // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing. + bool readToken(PdfToken* token, bool writeDiff = false); + + // Put back a token to be read in the nextToken read. Only one token is allowed to be put + // back. Must not necesaarely be the last token read. + void PutBack(PdfToken token); + + // Reads the inline image that is present in the stream. At this point we just consumed the ID + // token already. + SkPdfImageDictionary* readInlineImage(); + +private: + bool readTokenCore(PdfToken* token); + + SkPdfNativeDoc* fDoc; + SkPdfAllocator* fAllocator; + + const unsigned char* fUncompressedStreamStart; + const unsigned char* fUncompressedStream; + const unsigned char* fUncompressedStreamEnd; + + bool fEmpty; + bool fHasPutBack; + PdfToken fPutBack; +}; + +const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, + SkPdfNativeObject* token, + SkPdfAllocator* allocator, + SkPdfNativeDoc* doc); + +#endif // SkPdfNativeTokenizer_DEFINED |