summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h')
-rw-r--r--chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h201
1 files changed, 201 insertions, 0 deletions
diff --git a/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
new file mode 100644
index 00000000000..79b070f12ea
--- /dev/null
+++ b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkPdfNativeTokenizer_DEFINED
+#define SkPdfNativeTokenizer_DEFINED
+
+#include <math.h>
+#include <string.h>
+
+#include "SkPdfConfig.h"
+#include "SkTDArray.h"
+#include "SkTDict.h"
+
+// All these constants are defined by the PDF 1.4 Spec.
+
+class SkPdfDictionary;
+class SkPdfImageDictionary;
+class SkPdfNativeDoc;
+class SkPdfNativeObject;
+
+
+// White Spaces
+#define kNUL_PdfWhiteSpace '\x00'
+#define kHT_PdfWhiteSpace '\x09'
+#define kLF_PdfWhiteSpace '\x0A'
+#define kFF_PdfWhiteSpace '\x0C'
+#define kCR_PdfWhiteSpace '\x0D'
+#define kSP_PdfWhiteSpace '\x20'
+
+// PdfDelimiters
+#define kOpenedRoundBracket_PdfDelimiter '('
+#define kClosedRoundBracket_PdfDelimiter ')'
+#define kOpenedInequityBracket_PdfDelimiter '<'
+#define kClosedInequityBracket_PdfDelimiter '>'
+#define kOpenedSquareBracket_PdfDelimiter '['
+#define kClosedSquareBracket_PdfDelimiter ']'
+#define kOpenedCurlyBracket_PdfDelimiter '{'
+#define kClosedCurlyBracket_PdfDelimiter '}'
+#define kNamed_PdfDelimiter '/'
+#define kComment_PdfDelimiter '%'
+
+#define kEscape_PdfSpecial '\\'
+#define kBackspace_PdfSpecial '\x08'
+
+// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
+// we should evaluate all options. might be even different from one machine to another
+// 1) expand expression, let compiler optimize it
+// 2) binary search
+// 3) linear search in array
+// 4) vector (e.f. T type[256] .. return type[ch] ...
+// 5) manually build the expression with least number of operators, e.g. for consecutive
+// chars, we can use an binary equal ignoring last bit
+#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \
+ ((ch)==kHT_PdfWhiteSpace)|| \
+ ((ch)==kLF_PdfWhiteSpace)|| \
+ ((ch)==kFF_PdfWhiteSpace)|| \
+ ((ch)==kCR_PdfWhiteSpace)|| \
+ ((ch)==kSP_PdfWhiteSpace))
+
+#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
+
+
+#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
+ ((ch)==kClosedRoundBracket_PdfDelimiter)||\
+ ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
+ ((ch)==kClosedInequityBracket_PdfDelimiter)||\
+ ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
+ ((ch)==kClosedSquareBracket_PdfDelimiter)||\
+ ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
+ ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
+ ((ch)==kNamed_PdfDelimiter)||\
+ ((ch)==kComment_PdfDelimiter))
+
+#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
+
+#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
+#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
+
+const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end);
+const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end);
+
+#define BUFFER_SIZE 1024
+
+/** \class SkPdfAllocator
+ *
+ * An allocator only allocates memory, and it deletes it all when the allocator is destroyed.
+ * This strategy would allow us not to do any garbage collection while we parse and/or render
+ * a pdf.
+ *
+ */
+class SkPdfAllocator {
+public:
+ SkPdfAllocator() {
+ fSizeInBytes = sizeof(*this);
+ fCurrent = allocBlock();
+ fCurrentUsed = 0;
+ }
+
+ ~SkPdfAllocator();
+
+ // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called.
+ SkPdfNativeObject* allocObject();
+
+ // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called.
+ void* alloc(size_t bytes) {
+ void* data = malloc(bytes);
+ fHandles.push(data);
+ fSizeInBytes += bytes;
+ return data;
+ }
+
+ // Returns the number of bytes used in this allocator.
+ size_t bytesUsed() const {
+ return fSizeInBytes;
+ }
+
+private:
+ SkTDArray<SkPdfNativeObject*> fHistory;
+ SkTDArray<void*> fHandles;
+ SkPdfNativeObject* fCurrent;
+ int fCurrentUsed;
+
+ SkPdfNativeObject* allocBlock();
+ size_t fSizeInBytes;
+};
+
+// Type of a parsed token.
+enum SkPdfTokenType {
+ kKeyword_TokenType,
+ kObject_TokenType,
+};
+
+
+/** \struct PdfToken
+ *
+ * Stores the result of the parsing - a keyword or an object.
+ *
+ */
+struct PdfToken {
+ const char* fKeyword;
+ size_t fKeywordLength;
+ SkPdfNativeObject* fObject;
+ SkPdfTokenType fType;
+
+ PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
+};
+
+/** \class SkPdfNativeTokenizer
+ *
+ * Responsible to tokenize a stream in small tokens, eityh a keyword or an object.
+ * A renderer can feed on the tokens and render a pdf.
+ *
+ */
+class SkPdfNativeTokenizer {
+public:
+ SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
+ SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
+ SkPdfNativeTokenizer(const unsigned char* buffer, int len,
+ SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
+
+ virtual ~SkPdfNativeTokenizer();
+
+ // Reads one token. Returns false if there are no more tokens.
+ // If writeDiff is true, and a token was read, create a PNG highlighting
+ // the difference caused by this command in /tmp/log_step_by_step.
+ // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing.
+ bool readToken(PdfToken* token, bool writeDiff = false);
+
+ // Put back a token to be read in the nextToken read. Only one token is allowed to be put
+ // back. Must not necesaarely be the last token read.
+ void PutBack(PdfToken token);
+
+ // Reads the inline image that is present in the stream. At this point we just consumed the ID
+ // token already.
+ SkPdfImageDictionary* readInlineImage();
+
+private:
+ bool readTokenCore(PdfToken* token);
+
+ SkPdfNativeDoc* fDoc;
+ SkPdfAllocator* fAllocator;
+
+ const unsigned char* fUncompressedStreamStart;
+ const unsigned char* fUncompressedStream;
+ const unsigned char* fUncompressedStreamEnd;
+
+ bool fEmpty;
+ bool fHasPutBack;
+ PdfToken fPutBack;
+};
+
+const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
+ SkPdfNativeObject* token,
+ SkPdfAllocator* allocator,
+ SkPdfNativeDoc* doc);
+
+#endif // SkPdfNativeTokenizer_DEFINED