summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp')
-rw-r--r--chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp582
1 files changed, 582 insertions, 0 deletions
diff --git a/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp
new file mode 100644
index 00000000000..2f0c18cb60a
--- /dev/null
+++ b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkPdfNativeDoc.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "SkPdfMapper_autogen.h"
+#include "SkPdfNativeObject.h"
+#include "SkPdfNativeTokenizer.h"
+#include "SkPdfReporter.h"
+#include "SkStream.h"
+
+// TODO(edisonn): for some reason on mac these files are found here, but are found from headers
+//#include "SkPdfFileTrailerDictionary_autogen.h"
+//#include "SkPdfCatalogDictionary_autogen.h"
+//#include "SkPdfPageObjectDictionary_autogen.h"
+//#include "SkPdfPageTreeNodeDictionary_autogen.h"
+#include "SkPdfHeaders_autogen.h"
+
+static long getFileSize(const char* filename)
+{
+ struct stat stat_buf;
+ int rc = stat(filename, &stat_buf);
+ return rc == 0 ? (long)stat_buf.st_size : -1;
+}
+
+static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
+ while (current > start && !isPdfEOL(*(current - 1))) {
+ current--;
+ }
+ return current;
+}
+
+static const unsigned char* previousLineHome(const unsigned char* start,
+ const unsigned char* current) {
+ if (current > start && isPdfEOL(*(current - 1))) {
+ current--;
+ }
+
+ // allows CR+LF, LF+CR but not two CR+CR or LF+LF
+ if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
+ current--;
+ }
+
+ while (current > start && !isPdfEOL(*(current - 1))) {
+ current--;
+ }
+
+ return current;
+}
+
+static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
+ while (current < end && !isPdfEOL(*current)) {
+ current++;
+ }
+ current++;
+ if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
+ current++;
+ }
+ return current;
+}
+
+SkPdfNativeDoc* gDoc = NULL;
+
+SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
+ : fAllocator(new SkPdfAllocator())
+ , fFileContent(NULL)
+ , fContentLength(0)
+ , fRootCatalogRef(NULL)
+ , fRootCatalog(NULL) {
+ size_t size = stream->getLength();
+ void* ptr = sk_malloc_throw(size);
+ stream->read(ptr, size);
+
+ init(ptr, size);
+}
+
+SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
+ : fAllocator(new SkPdfAllocator())
+ , fFileContent(NULL)
+ , fContentLength(0)
+ , fRootCatalogRef(NULL)
+ , fRootCatalog(NULL) {
+ gDoc = this;
+ FILE* file = fopen(path, "r");
+ // TODO(edisonn): put this in a function that can return NULL
+ if (file) {
+ size_t size = getFileSize(path);
+ void* content = sk_malloc_throw(size);
+ bool ok = (0 != fread(content, size, 1, file));
+ fclose(file);
+ if (!ok) {
+ sk_free(content);
+ SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
+ "could not read file", NULL, NULL);
+ // TODO(edisonn): not nice to return like this from constructor, create a static
+ // function that can report NULL for failures.
+ return; // Doc will have 0 pages
+ }
+
+ init(content, size);
+ }
+}
+
+void SkPdfNativeDoc::init(const void* bytes, size_t length) {
+ fFileContent = (const unsigned char*)bytes;
+ fContentLength = length;
+ const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
+ const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
+ const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
+
+ if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
+ SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
+ "Could not find startxref", NULL, NULL);
+ }
+
+ long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
+
+ bool storeCatalog = true;
+ while (xrefByteOffset >= 0) {
+ const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset,
+ xrefstartKeywordLine);
+ xrefByteOffset = -1;
+ if (trailerStart < xrefstartKeywordLine) {
+ this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
+ storeCatalog = false;
+ }
+ }
+
+ // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
+ // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
+
+ if (fRootCatalogRef) {
+ fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
+ if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
+ SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
+ if (tree && tree->isDictionary() && tree->valid()) {
+ fillPages(tree);
+ }
+ }
+ }
+
+ if (pages() == 0) {
+ // TODO(edisonn): probably it would be better to return NULL and make a clean document.
+ loadWithoutXRef();
+ }
+
+ // TODO(edisonn): corrupted pdf, read it from beginning and rebuild
+ // (xref, trailer, or just read all objects)
+}
+
+void SkPdfNativeDoc::loadWithoutXRef() {
+ const unsigned char* current = fFileContent;
+ const unsigned char* end = fFileContent + fContentLength;
+
+ // TODO(edisonn): read pdf version
+ current = ignoreLine(current, end);
+
+ current = skipPdfWhiteSpaces(current, end);
+ while (current < end) {
+ SkPdfNativeObject token;
+ current = nextObject(current, end, &token, NULL, NULL);
+ if (token.isInteger()) {
+ int id = (int)token.intValue();
+
+ token.reset();
+ current = nextObject(current, end, &token, NULL, NULL);
+ // TODO(edisonn): generation ignored for now (used in pdfs with updates)
+ // int generation = (int)token.intValue();
+
+ token.reset();
+ current = nextObject(current, end, &token, NULL, NULL);
+ // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring.
+ if (!token.isKeyword("obj")) {
+ SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
+ "Could not find obj", NULL, NULL);
+ continue;
+ }
+
+ while (fObjects.count() < id + 1) {
+ reset(fObjects.append());
+ }
+
+ fObjects[id].fOffset = current - fFileContent;
+
+ SkPdfNativeObject* obj = fAllocator->allocObject();
+ current = nextObject(current, end, obj, fAllocator, this);
+
+ fObjects[id].fResolvedReference = obj;
+ fObjects[id].fObj = obj;
+ fObjects[id].fIsReferenceResolved = true;
+ } else if (token.isKeyword("trailer")) {
+ long dummy;
+ current = readTrailer(current, end, true, &dummy, true);
+ } else if (token.isKeyword("startxref")) {
+ token.reset();
+ current = nextObject(current, end, &token, NULL, NULL); // ignore startxref
+ }
+
+ current = skipPdfWhiteSpaces(current, end);
+ }
+
+ // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we
+ // might not need it.
+ if (!fRootCatalogRef) {
+ for (unsigned int i = 0 ; i < objects(); i++) {
+ SkPdfNativeObject* obj = object(i);
+ SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
+ if (root && root->isReference()) {
+ fRootCatalogRef = root;
+ }
+ }
+ }
+
+ if (fRootCatalogRef) {
+ fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
+ if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
+ SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
+ if (tree && tree->isDictionary() && tree->valid()) {
+ fillPages(tree);
+ }
+ }
+ }
+
+
+}
+
+SkPdfNativeDoc::~SkPdfNativeDoc() {
+ sk_free((void*)fFileContent);
+ delete fAllocator;
+}
+
+const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart,
+ const unsigned char* trailerEnd) {
+ SkPdfNativeObject xref;
+ const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);
+
+ if (!xref.isKeyword("xref")) {
+ SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref",
+ NULL, NULL);
+ return trailerEnd;
+ }
+
+ SkPdfNativeObject token;
+ while (current < trailerEnd) {
+ token.reset();
+ const unsigned char* previous = current;
+ current = nextObject(current, trailerEnd, &token, NULL, NULL);
+ if (!token.isInteger()) {
+ SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
+ "Done readCrossReferenceSection", NULL, NULL);
+ return previous;
+ }
+
+ int startId = (int)token.intValue();
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL, NULL);
+
+ if (!token.isInteger()) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection",
+ &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
+ return current;
+ }
+
+ int entries = (int)token.intValue();
+
+ for (int i = 0; i < entries; i++) {
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL, NULL);
+ if (!token.isInteger()) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readCrossReferenceSection",
+ &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
+ return current;
+ }
+ int offset = (int)token.intValue();
+
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL, NULL);
+ if (!token.isInteger()) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readCrossReferenceSection",
+ &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
+ return current;
+ }
+ int generation = (int)token.intValue();
+
+ token.reset();
+ current = nextObject(current, trailerEnd, &token, NULL, NULL);
+ if (!token.isKeyword() || token.lenstr() != 1 ||
+ (*token.c_str() != 'f' && *token.c_str() != 'n')) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readCrossReferenceSection: f or n expected",
+ &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
+ return current;
+ }
+
+ this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
+ }
+ }
+ SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
+ "Unexpected end of readCrossReferenceSection", NULL, NULL);
+ return current;
+}
+
+const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart,
+ const unsigned char* trailerEnd,
+ bool storeCatalog, long* prev, bool skipKeyword) {
+ *prev = -1;
+
+ const unsigned char* current = trailerStart;
+ if (!skipKeyword) {
+ SkPdfNativeObject trailerKeyword;
+ // Use null allocator, and let it just fail if memory, it should not crash.
+ current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);
+
+ if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
+ strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readTrailer: trailer keyword expected",
+ &trailerKeyword,
+ SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
+ return current;
+ }
+ }
+
+ SkPdfNativeObject token;
+ current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
+ if (!token.isDictionary()) {
+ return current;
+ }
+ SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
+ if (!trailer->valid()) {
+ return current;
+ }
+
+ if (storeCatalog) {
+ SkPdfNativeObject* ref = trailer->Root(NULL);
+ if (ref == NULL || !ref->isReference()) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readTrailer: unexpected root reference",
+ ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
+ return current;
+ }
+ fRootCatalogRef = ref;
+ }
+
+ if (trailer->has_Prev()) {
+ *prev = (long)trailer->Prev(NULL);
+ }
+
+ return current;
+}
+
+void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
+ // TODO(edisonn): security here, verify id
+ while (fObjects.count() < id + 1) {
+ this->reset(fObjects.append());
+ }
+
+ fObjects[id].fOffset = offset;
+ fObjects[id].fObj = NULL;
+ fObjects[id].fResolvedReference = NULL;
+ fObjects[id].fIsReferenceResolved = false;
+}
+
+SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
+ long startOffset = fObjects[id].fOffset;
+ //long endOffset = fObjects[id].fOffsetEnd;
+ // TODO(edisonn): use hinted endOffset
+ const unsigned char* current = fFileContent + startOffset;
+ const unsigned char* end = fFileContent + fContentLength;
+
+ SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this);
+
+ SkPdfNativeObject idObj;
+ SkPdfNativeObject generationObj;
+ SkPdfNativeObject objKeyword;
+ SkPdfNativeObject* dict = fAllocator->allocObject();
+
+ current = nextObject(current, end, &idObj, NULL, NULL);
+ if (current >= end) {
+ SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id",
+ NULL, NULL);
+ return NULL;
+ }
+
+ current = nextObject(current, end, &generationObj, NULL, NULL);
+ if (current >= end) {
+ SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
+ "reading generation", NULL, NULL);
+ return NULL;
+ }
+
+ current = nextObject(current, end, &objKeyword, NULL, NULL);
+ if (current >= end) {
+ SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
+ "reading keyword obj", NULL, NULL);
+ return NULL;
+ }
+
+ if (!idObj.isInteger() || id != idObj.intValue()) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id",
+ &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
+ }
+
+ // TODO(edisonn): verify that the generation is the right one
+ if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readObject: unexpected generation",
+ &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
+ }
+
+ if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
+ SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
+ "readObject: unexpected obj keyword",
+ &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
+ }
+
+ current = nextObject(current, end, dict, fAllocator, this);
+
+ // TODO(edisonn): report warning/error - verify that the last token is endobj
+
+ return dict;
+}
+
+void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
+ SkPdfArray* kids = tree->Kids(this);
+ if (kids == NULL) {
+ *fPages.append() = (SkPdfPageObjectDictionary*)tree;
+ return;
+ }
+
+ int cnt = (int) kids->size();
+ for (int i = 0; i < cnt; i++) {
+ SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
+ if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
+ *fPages.append() = (SkPdfPageObjectDictionary*)obj;
+ } else {
+ // TODO(edisonn): verify that it is a page tree indeed
+ fillPages((SkPdfPageTreeNodeDictionary*)obj);
+ }
+ }
+}
+
+int SkPdfNativeDoc::pages() const {
+ return fPages.count();
+}
+
+SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
+ SkASSERT(page >= 0 && page < fPages.count());
+ return fPages[page];
+}
+
+
+SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
+ SkASSERT(page >= 0 && page < fPages.count());
+ return fPages[page]->Resources(this);
+}
+
+// TODO(edisonn): Partial implemented.
+// Move the logics directly in the code generator for inheritable and default values?
+SkRect SkPdfNativeDoc::MediaBox(int page) {
+ SkPdfPageObjectDictionary* current = fPages[page];
+ while (!current->has_MediaBox() && current->has_Parent()) {
+ current = (SkPdfPageObjectDictionary*)current->Parent(this);
+ }
+ if (current) {
+ return current->MediaBox(this);
+ }
+ return SkRect::MakeEmpty();
+}
+
+size_t SkPdfNativeDoc::objects() const {
+ return fObjects.count();
+}
+
+SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
+ SkASSERT(!(i < 0 || i > fObjects.count()));
+
+ if (i < 0 || i > fObjects.count()) {
+ return NULL;
+ }
+
+ if (fObjects[i].fObj == NULL) {
+ fObjects[i].fObj = readObject(i);
+ // TODO(edisonn): For perf, when we read the cross reference sections, we should take
+ // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad
+ // stream, and fail quickly, in case we default to sequential stream read.
+ }
+
+ return fObjects[i].fObj;
+}
+
+const SkPdfMapper* SkPdfNativeDoc::mapper() const {
+ return fMapper;
+}
+
+SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
+ SkPdfNativeObject* obj = fAllocator->allocObject();
+ SkPdfNativeObject::makeReal(value, obj);
+ TRACK_OBJECT_SRC(obj);
+ return (SkPdfReal*)obj;
+}
+
+SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
+ SkPdfNativeObject* obj = fAllocator->allocObject();
+ SkPdfNativeObject::makeInteger(value, obj);
+ TRACK_OBJECT_SRC(obj);
+ return (SkPdfInteger*)obj;
+}
+
+SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
+ SkPdfNativeObject* obj = fAllocator->allocObject();
+ SkPdfNativeObject::makeString(sz, len, obj);
+ TRACK_OBJECT_SRC(obj);
+ return (SkPdfString*)obj;
+}
+
+SkPdfAllocator* SkPdfNativeDoc::allocator() const {
+ return fAllocator;
+}
+
+SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
+ if (ref && ref->isReference()) {
+ int id = ref->referenceId();
+ // TODO(edisonn): generation/updates not supported now
+ //int gen = ref->referenceGeneration();
+
+ // TODO(edisonn): verify id and gen expected
+ if (id < 0 || id >= fObjects.count()) {
+ SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
+ "resolve reference id out of bounds", NULL, NULL);
+ return NULL;
+ }
+
+ if (fObjects[id].fIsReferenceResolved) {
+ SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity,
+ kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
+ return fObjects[id].fResolvedReference;
+ }
+
+ // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow
+ // here unless we check for resolved reference on next line.
+ // Determine if the pdf is corrupted, or we have a bug here.
+
+ // Avoids recursive calls
+ fObjects[id].fIsReferenceResolved = true;
+
+ if (fObjects[id].fObj == NULL) {
+ fObjects[id].fObj = readObject(id);
+ }
+
+ if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) {
+ if (!fObjects[id].fObj->isReference()) {
+ fObjects[id].fResolvedReference = fObjects[id].fObj;
+ } else {
+ fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
+ }
+ }
+
+ return fObjects[id].fResolvedReference;
+ }
+
+ return (SkPdfNativeObject*)ref;
+}
+
+size_t SkPdfNativeDoc::bytesUsed() const {
+ return fAllocator->bytesUsed() +
+ fContentLength +
+ fObjects.count() * sizeof(PublicObjectEntry) +
+ fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
+ sizeof(*this);
+}