diff options
Diffstat (limited to 'chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp')
-rw-r--r-- | chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp new file mode 100644 index 00000000000..2f0c18cb60a --- /dev/null +++ b/chromium/third_party/skia/experimental/PdfViewer/pdfparser/native/SkPdfNativeDoc.cpp @@ -0,0 +1,582 @@ +/* + * Copyright 2013 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkPdfNativeDoc.h" + +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "SkPdfMapper_autogen.h" +#include "SkPdfNativeObject.h" +#include "SkPdfNativeTokenizer.h" +#include "SkPdfReporter.h" +#include "SkStream.h" + +// TODO(edisonn): for some reason on mac these files are found here, but are found from headers +//#include "SkPdfFileTrailerDictionary_autogen.h" +//#include "SkPdfCatalogDictionary_autogen.h" +//#include "SkPdfPageObjectDictionary_autogen.h" +//#include "SkPdfPageTreeNodeDictionary_autogen.h" +#include "SkPdfHeaders_autogen.h" + +static long getFileSize(const char* filename) +{ + struct stat stat_buf; + int rc = stat(filename, &stat_buf); + return rc == 0 ? (long)stat_buf.st_size : -1; +} + +static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) { + while (current > start && !isPdfEOL(*(current - 1))) { + current--; + } + return current; +} + +static const unsigned char* previousLineHome(const unsigned char* start, + const unsigned char* current) { + if (current > start && isPdfEOL(*(current - 1))) { + current--; + } + + // allows CR+LF, LF+CR but not two CR+CR or LF+LF + if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) { + current--; + } + + while (current > start && !isPdfEOL(*(current - 1))) { + current--; + } + + return current; +} + +static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) { + while (current < end && !isPdfEOL(*current)) { + current++; + } + current++; + if (current < end && isPdfEOL(*current) && *current != *(current - 1)) { + current++; + } + return current; +} + +SkPdfNativeDoc* gDoc = NULL; + +SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream) + : fAllocator(new SkPdfAllocator()) + , fFileContent(NULL) + , fContentLength(0) + , fRootCatalogRef(NULL) + , fRootCatalog(NULL) { + size_t size = stream->getLength(); + void* ptr = sk_malloc_throw(size); + stream->read(ptr, size); + + init(ptr, size); +} + +SkPdfNativeDoc::SkPdfNativeDoc(const char* path) + : fAllocator(new SkPdfAllocator()) + , fFileContent(NULL) + , fContentLength(0) + , fRootCatalogRef(NULL) + , fRootCatalog(NULL) { + gDoc = this; + FILE* file = fopen(path, "r"); + // TODO(edisonn): put this in a function that can return NULL + if (file) { + size_t size = getFileSize(path); + void* content = sk_malloc_throw(size); + bool ok = (0 != fread(content, size, 1, file)); + fclose(file); + if (!ok) { + sk_free(content); + SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "could not read file", NULL, NULL); + // TODO(edisonn): not nice to return like this from constructor, create a static + // function that can report NULL for failures. + return; // Doc will have 0 pages + } + + init(content, size); + } +} + +void SkPdfNativeDoc::init(const void* bytes, size_t length) { + fFileContent = (const unsigned char*)bytes; + fContentLength = length; + const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1); + const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine); + const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine); + + if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, + "Could not find startxref", NULL, NULL); + } + + long xrefByteOffset = atol((const char*)xrefByteOffsetLine); + + bool storeCatalog = true; + while (xrefByteOffset >= 0) { + const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset, + xrefstartKeywordLine); + xrefByteOffset = -1; + if (trailerStart < xrefstartKeywordLine) { + this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false); + storeCatalog = false; + } + } + + // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration + // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper + + if (fRootCatalogRef) { + fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); + if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { + SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); + if (tree && tree->isDictionary() && tree->valid()) { + fillPages(tree); + } + } + } + + if (pages() == 0) { + // TODO(edisonn): probably it would be better to return NULL and make a clean document. + loadWithoutXRef(); + } + + // TODO(edisonn): corrupted pdf, read it from beginning and rebuild + // (xref, trailer, or just read all objects) +} + +void SkPdfNativeDoc::loadWithoutXRef() { + const unsigned char* current = fFileContent; + const unsigned char* end = fFileContent + fContentLength; + + // TODO(edisonn): read pdf version + current = ignoreLine(current, end); + + current = skipPdfWhiteSpaces(current, end); + while (current < end) { + SkPdfNativeObject token; + current = nextObject(current, end, &token, NULL, NULL); + if (token.isInteger()) { + int id = (int)token.intValue(); + + token.reset(); + current = nextObject(current, end, &token, NULL, NULL); + // TODO(edisonn): generation ignored for now (used in pdfs with updates) + // int generation = (int)token.intValue(); + + token.reset(); + current = nextObject(current, end, &token, NULL, NULL); + // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring. + if (!token.isKeyword("obj")) { + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, + "Could not find obj", NULL, NULL); + continue; + } + + while (fObjects.count() < id + 1) { + reset(fObjects.append()); + } + + fObjects[id].fOffset = current - fFileContent; + + SkPdfNativeObject* obj = fAllocator->allocObject(); + current = nextObject(current, end, obj, fAllocator, this); + + fObjects[id].fResolvedReference = obj; + fObjects[id].fObj = obj; + fObjects[id].fIsReferenceResolved = true; + } else if (token.isKeyword("trailer")) { + long dummy; + current = readTrailer(current, end, true, &dummy, true); + } else if (token.isKeyword("startxref")) { + token.reset(); + current = nextObject(current, end, &token, NULL, NULL); // ignore startxref + } + + current = skipPdfWhiteSpaces(current, end); + } + + // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we + // might not need it. + if (!fRootCatalogRef) { + for (unsigned int i = 0 ; i < objects(); i++) { + SkPdfNativeObject* obj = object(i); + SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL; + if (root && root->isReference()) { + fRootCatalogRef = root; + } + } + } + + if (fRootCatalogRef) { + fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); + if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { + SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); + if (tree && tree->isDictionary() && tree->valid()) { + fillPages(tree); + } + } + } + + +} + +SkPdfNativeDoc::~SkPdfNativeDoc() { + sk_free((void*)fFileContent); + delete fAllocator; +} + +const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, + const unsigned char* trailerEnd) { + SkPdfNativeObject xref; + const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL); + + if (!xref.isKeyword("xref")) { + SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", + NULL, NULL); + return trailerEnd; + } + + SkPdfNativeObject token; + while (current < trailerEnd) { + token.reset(); + const unsigned char* previous = current; + current = nextObject(current, trailerEnd, &token, NULL, NULL); + if (!token.isInteger()) { + SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, + "Done readCrossReferenceSection", NULL, NULL); + return previous; + } + + int startId = (int)token.intValue(); + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL, NULL); + + if (!token.isInteger()) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + return current; + } + + int entries = (int)token.intValue(); + + for (int i = 0; i < entries; i++) { + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL, NULL); + if (!token.isInteger()) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + return current; + } + int offset = (int)token.intValue(); + + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL, NULL); + if (!token.isInteger()) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection", + &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + return current; + } + int generation = (int)token.intValue(); + + token.reset(); + current = nextObject(current, trailerEnd, &token, NULL, NULL); + if (!token.isKeyword() || token.lenstr() != 1 || + (*token.c_str() != 'f' && *token.c_str() != 'n')) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readCrossReferenceSection: f or n expected", + &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + return current; + } + + this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); + } + } + SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, + "Unexpected end of readCrossReferenceSection", NULL, NULL); + return current; +} + +const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, + const unsigned char* trailerEnd, + bool storeCatalog, long* prev, bool skipKeyword) { + *prev = -1; + + const unsigned char* current = trailerStart; + if (!skipKeyword) { + SkPdfNativeObject trailerKeyword; + // Use null allocator, and let it just fail if memory, it should not crash. + current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL); + + if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || + strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readTrailer: trailer keyword expected", + &trailerKeyword, + SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + return current; + } + } + + SkPdfNativeObject token; + current = nextObject(current, trailerEnd, &token, fAllocator, NULL); + if (!token.isDictionary()) { + return current; + } + SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; + if (!trailer->valid()) { + return current; + } + + if (storeCatalog) { + SkPdfNativeObject* ref = trailer->Root(NULL); + if (ref == NULL || !ref->isReference()) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readTrailer: unexpected root reference", + ref, SkPdfNativeObject::kReference_PdfObjectType, NULL); + return current; + } + fRootCatalogRef = ref; + } + + if (trailer->has_Prev()) { + *prev = (long)trailer->Prev(NULL); + } + + return current; +} + +void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { + // TODO(edisonn): security here, verify id + while (fObjects.count() < id + 1) { + this->reset(fObjects.append()); + } + + fObjects[id].fOffset = offset; + fObjects[id].fObj = NULL; + fObjects[id].fResolvedReference = NULL; + fObjects[id].fIsReferenceResolved = false; +} + +SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) { + long startOffset = fObjects[id].fOffset; + //long endOffset = fObjects[id].fOffsetEnd; + // TODO(edisonn): use hinted endOffset + const unsigned char* current = fFileContent + startOffset; + const unsigned char* end = fFileContent + fContentLength; + + SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this); + + SkPdfNativeObject idObj; + SkPdfNativeObject generationObj; + SkPdfNativeObject objKeyword; + SkPdfNativeObject* dict = fAllocator->allocObject(); + + current = nextObject(current, end, &idObj, NULL, NULL); + if (current >= end) { + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", + NULL, NULL); + return NULL; + } + + current = nextObject(current, end, &generationObj, NULL, NULL); + if (current >= end) { + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "reading generation", NULL, NULL); + return NULL; + } + + current = nextObject(current, end, &objKeyword, NULL, NULL); + if (current >= end) { + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "reading keyword obj", NULL, NULL); + return NULL; + } + + if (!idObj.isInteger() || id != idObj.intValue()) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", + &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + } + + // TODO(edisonn): verify that the generation is the right one + if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readObject: unexpected generation", + &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); + } + + if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { + SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, + "readObject: unexpected obj keyword", + &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); + } + + current = nextObject(current, end, dict, fAllocator, this); + + // TODO(edisonn): report warning/error - verify that the last token is endobj + + return dict; +} + +void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) { + SkPdfArray* kids = tree->Kids(this); + if (kids == NULL) { + *fPages.append() = (SkPdfPageObjectDictionary*)tree; + return; + } + + int cnt = (int) kids->size(); + for (int i = 0; i < cnt; i++) { + SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i)); + if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) { + *fPages.append() = (SkPdfPageObjectDictionary*)obj; + } else { + // TODO(edisonn): verify that it is a page tree indeed + fillPages((SkPdfPageTreeNodeDictionary*)obj); + } + } +} + +int SkPdfNativeDoc::pages() const { + return fPages.count(); +} + +SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) { + SkASSERT(page >= 0 && page < fPages.count()); + return fPages[page]; +} + + +SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) { + SkASSERT(page >= 0 && page < fPages.count()); + return fPages[page]->Resources(this); +} + +// TODO(edisonn): Partial implemented. +// Move the logics directly in the code generator for inheritable and default values? +SkRect SkPdfNativeDoc::MediaBox(int page) { + SkPdfPageObjectDictionary* current = fPages[page]; + while (!current->has_MediaBox() && current->has_Parent()) { + current = (SkPdfPageObjectDictionary*)current->Parent(this); + } + if (current) { + return current->MediaBox(this); + } + return SkRect::MakeEmpty(); +} + +size_t SkPdfNativeDoc::objects() const { + return fObjects.count(); +} + +SkPdfNativeObject* SkPdfNativeDoc::object(int i) { + SkASSERT(!(i < 0 || i > fObjects.count())); + + if (i < 0 || i > fObjects.count()) { + return NULL; + } + + if (fObjects[i].fObj == NULL) { + fObjects[i].fObj = readObject(i); + // TODO(edisonn): For perf, when we read the cross reference sections, we should take + // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad + // stream, and fail quickly, in case we default to sequential stream read. + } + + return fObjects[i].fObj; +} + +const SkPdfMapper* SkPdfNativeDoc::mapper() const { + return fMapper; +} + +SkPdfReal* SkPdfNativeDoc::createReal(double value) const { + SkPdfNativeObject* obj = fAllocator->allocObject(); + SkPdfNativeObject::makeReal(value, obj); + TRACK_OBJECT_SRC(obj); + return (SkPdfReal*)obj; +} + +SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const { + SkPdfNativeObject* obj = fAllocator->allocObject(); + SkPdfNativeObject::makeInteger(value, obj); + TRACK_OBJECT_SRC(obj); + return (SkPdfInteger*)obj; +} + +SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const { + SkPdfNativeObject* obj = fAllocator->allocObject(); + SkPdfNativeObject::makeString(sz, len, obj); + TRACK_OBJECT_SRC(obj); + return (SkPdfString*)obj; +} + +SkPdfAllocator* SkPdfNativeDoc::allocator() const { + return fAllocator; +} + +SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { + if (ref && ref->isReference()) { + int id = ref->referenceId(); + // TODO(edisonn): generation/updates not supported now + //int gen = ref->referenceGeneration(); + + // TODO(edisonn): verify id and gen expected + if (id < 0 || id >= fObjects.count()) { + SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, + "resolve reference id out of bounds", NULL, NULL); + return NULL; + } + + if (fObjects[id].fIsReferenceResolved) { + SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, + kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL); + return fObjects[id].fResolvedReference; + } + + // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow + // here unless we check for resolved reference on next line. + // Determine if the pdf is corrupted, or we have a bug here. + + // Avoids recursive calls + fObjects[id].fIsReferenceResolved = true; + + if (fObjects[id].fObj == NULL) { + fObjects[id].fObj = readObject(id); + } + + if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) { + if (!fObjects[id].fObj->isReference()) { + fObjects[id].fResolvedReference = fObjects[id].fObj; + } else { + fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj); + } + } + + return fObjects[id].fResolvedReference; + } + + return (SkPdfNativeObject*)ref; +} + +size_t SkPdfNativeDoc::bytesUsed() const { + return fAllocator->bytesUsed() + + fContentLength + + fObjects.count() * sizeof(PublicObjectEntry) + + fPages.count() * sizeof(SkPdfPageObjectDictionary*) + + sizeof(*this); +} |