summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTempura San <tempura.san@gmail.com>2018-03-17 21:20:06 +0100
committerChristopher Adams <chris.adams@jollamobile.com>2019-02-27 01:53:04 +0000
commit0b4522ae19d560881fdf85e8762c0d7f28310d50 (patch)
tree1a1019fb7ecc57c339614203ba7fdafd53442b6a
parent3f567865fbfd87b47b7f786d935d96f93cef9176 (diff)
Enable parsing of VCARDs with mixed line endings
VCARDs should use the CRLF line ending. Some parties may nonetheless deliver mixed line endings in a single VCARD. The QVersitReader should be able to parse these non-standard VCARDs as well as the content is still valid. Change-Id: Ia8ba810d96d59aeab2dd49a1c37989082cb54993 Reviewed-by: Tempura San <tempura.san@gmail.com> Reviewed-by: André Hartmann <aha_1980@gmx.de>
-rw-r--r--src/versit/qversitreader_p.cpp173
-rw-r--r--src/versit/qversitreader_p.h1
-rw-r--r--tests/auto/versit/qversitreader/tst_qversitreader.cpp14
3 files changed, 109 insertions, 79 deletions
diff --git a/src/versit/qversitreader_p.cpp b/src/versit/qversitreader_p.cpp
index e13b0892e..ccf6b05a6 100644
--- a/src/versit/qversitreader_p.cpp
+++ b/src/versit/qversitreader_p.cpp
@@ -116,7 +116,6 @@ LineReader::LineReader(QIODevice* device, QTextCodec *codec)
} else {
mIsCodecCertain = true;
}
- mCrlfList = *VersitUtils::newlineList(mCodec);
}
/*!
@@ -129,7 +128,6 @@ LineReader::LineReader(QIODevice* device, QTextCodec *codec, int chunkSize)
mCodec(codec),
mIsCodecCertain(true),
mChunkSize(chunkSize),
- mCrlfList(*VersitUtils::newlineList(mCodec)),
mOdometer(0),
mSearchFrom(0)
{
@@ -199,6 +197,10 @@ LByteArray LineReader::readLine()
continuation of the next line)
*/
void LineReader::readOneLine(LByteArray* cursor) {
+ QByteArray cr(VersitUtils::encode('\r', mCodec));
+ QByteArray lf(VersitUtils::encode('\n', mCodec));
+ QByteArray crlf(VersitUtils::encode("\r\n", mCodec));
+
cursor->mStart = cursor->mEnd;
mSearchFrom = cursor->mStart;
@@ -212,15 +214,36 @@ void LineReader::readOneLine(LByteArray* cursor) {
QByteArray temp = mDevice->read(mChunkSize);
if (!temp.isEmpty()) {
cursor->mData.append(temp);
+
+ // Sanitise CRLF before proceeding to handle mixed line endings.
+ // Convert the other two possible newline representations to '\n'.
+ cursor->mData.replace(crlf, lf);
+ if (cursor->mData.endsWith(cr)) {
+ // Corner case: line ends with \r - could also be \r\n after the
+ // next read, so do not replace the last occurrence (yet)
+ // So restore it for now.
+ cursor->mData.replace(cr, lf);
+ cursor->mData.truncate(cursor->mData.length() - lf.length());
+ cursor->mData.append(cr);
+ } else {
+ cursor->mData.replace(cr, lf);
+ }
+
if (tryReadLine(cursor, false))
return;
+
} else {
mDevice->waitForReadyRead(500);
}
}
// We've reached the end of the stream. Find a newline from the buffer (or return what's left).
+ // But first, strip the last occurrence of \r - if present - left from before.
+ if (cursor->mData.endsWith(cr))
+ cursor->mData.truncate(cursor->mData.length() - cr.length());
+
tryReadLine(cursor, true);
+
return;
}
@@ -282,101 +305,95 @@ void LineReader::setCodecUtf8Incompatible() {
* sequences of newline-space from the retrieved line. Skips over any newlines at the start of the
* input.
*
- * \a cursor is filled with a the line
+ * \a cursor filled with the line to parse
* \a atEnd is true if we've reached the end of the stream
* Returns true if a line was completely read (ie. a newline character was found)
+ *
+ * Expects all newline sequences (\r\n, \r and \n) already changed to \n (referred as NL or 'newline'
+ * in the code) by the caller.
*/
bool LineReader::tryReadLine(LByteArray *cursor, bool atEnd)
{
- int crlfPos = -1;
- int doubleCrLfCheck = -1;
- QByteArray space(VersitUtils::encode(' ', mCodec));
+ int nlPos = -1;
+ int doubleNlCheck = -1;
+
+ QByteArray nl(VersitUtils::encode('\n', mCodec));
QByteArray tab(VersitUtils::encode('\t', mCodec));
+ QByteArray space(VersitUtils::encode(' ', mCodec));
QByteArray equals(VersitUtils::encode('=', mCodec));
+ int nlLength = nl.length();
int spaceLength = space.length();
int equalsLength = equals.length();
forever {
- foreach(const QByteArrayMatcher& crlf, mCrlfList) {
- int crlfLength = crlf.pattern().length();
- crlfPos = crlf.indexIn(cursor->mData, mSearchFrom);
- doubleCrLfCheck = crlf.indexIn(cursor->mData, mSearchFrom + crlfLength);
- if ((crlfPos == cursor->mStart) && (doubleCrLfCheck != crlfPos + crlfLength)) {
- // Single Newline at start of line. Ignore and Set mStart to directly after it.
- cursor->mStart += crlfLength;
- mSearchFrom = cursor->mStart;
- break;
- } else if ((crlfPos == cursor->mStart) && (doubleCrLfCheck == crlfPos + crlfLength)) {
- // Found '=CrLfCrLf' - We choose to see this as badly formed,
- // but clear end of the versit property.
- cursor->mData.remove(crlfPos, crlfLength);
- cursor->mEnd = crlfPos;
- if (QVersitReaderPrivate::containsAt(cursor->mData, equals, crlfPos - equalsLength) ) {
- cursor->mData.remove(crlfPos -1, 1);
+ nlPos = cursor->mData.indexOf(nl, mSearchFrom);
+ doubleNlCheck = cursor->mData.indexOf(nl, mSearchFrom + nlLength);
+ if ((nlPos == cursor->mStart) && (doubleNlCheck != nlPos + nlLength)) {
+ // Single newline at start of line - ignore and set mStart to directly after it.
+ cursor->mStart += nlLength;
+ mSearchFrom = cursor->mStart;
+ continue;
+ } else if ((nlPos == cursor->mStart) && (doubleNlCheck == nlPos + nlLength)) {
+ // Found '=NLNL' - we choose to see this as badly formed,
+ // but clearly marks the end of the versit property.
+ cursor->mData.remove(nlPos, nlLength);
+ cursor->mEnd = nlPos;
+ if (QVersitReaderPrivate::containsAt(cursor->mData, equals, nlPos - equalsLength) ) {
+ cursor->mData.remove(nlPos - 1, 1);
+ }
+ return true;
+ } else if (nlPos > cursor->mStart) {
+ // Found the first occurrence of newline in the current buffer.
+ if (QVersitReaderPrivate::containsAt(cursor->mData, space, nlPos + nlLength)
+ || QVersitReaderPrivate::containsAt(cursor->mData, tab, nlPos + nlLength)) {
+ // If it's followed by whitespace, collapse it.
+ cursor->mData.remove(nlPos, nlLength + spaceLength);
+ mSearchFrom = nlPos;
+ continue;
+ } else if (!atEnd && nlPos + nlLength + spaceLength >= cursor->mData.size()) {
+ // If our newline is at the end of the current buffer but there's more to read,
+ // it's possible that a space could be hiding on the next read from the device.
+ // Just pretend we didn't see the newline and pick it up the next time round.
+ mSearchFrom = nlPos;
+ return false;
+ } else {
+ // Found the newline.
+ // Hack: if malformed vCard files (having no NL or NLNL ending) are
+ // concatenated, we can get a malformed line in the document which looks like:
+ // END:VCARDBEGIN:VCARD
+ // In that situation, we should actually insert the newline sequence manually,
+ // and return mEnd after the END:VCARD + NL position.
+ QByteArray ev(VersitUtils::encode(QByteArray("END:VCARD"), mCodec));
+ QByteArray evbv(VersitUtils::encode(QByteArray("END:VCARDBEGIN:VCARD"), mCodec));
+ QByteArray evnl(VersitUtils::encode(QByteArray("END:VCARD\n"), mCodec));
+
+ QByteArray possiblyMalformedLine = cursor->mData.mid(cursor->mStart, nlPos-cursor->mStart);
+ int pmlEnd = possiblyMalformedLine.size() - 1;
+ while (true) {
+ if (QVersitReaderPrivate::containsAt(possiblyMalformedLine, nl, pmlEnd - nlLength))
+ possiblyMalformedLine.chop(nlLength);
+ else
+ break;
}
- return true;
- } else if (crlfPos > cursor->mStart) {
- // Found the first occurance of CRLF in the current buffer.
- if (QVersitReaderPrivate::containsAt(cursor->mData, space, crlfPos + crlfLength)
- || QVersitReaderPrivate::containsAt(cursor->mData, tab, crlfPos + crlfLength)) {
- // If it's followed by whitespace, collapse it.
- cursor->mData.remove(crlfPos, crlfLength + spaceLength);
- mSearchFrom = crlfPos;
- break;
- } else if (!atEnd && crlfPos + crlfLength + spaceLength >= cursor->mData.size()) {
- // If our CRLF is at the end of the current buffer but there's more to read,
- // it's possible that a space could be hiding on the next read from the device.
- // Just pretend we didn't see the CRLF and pick it up the next time round.
- mSearchFrom = crlfPos;
- return false;
+ if (possiblyMalformedLine == evbv) {
+ // fix up the malformed line, return the end cursor after it.
+ cursor->mData.replace(cursor->mStart, ev.length(), evnl);
+ cursor->mEnd = cursor->mStart + evnl.length();
} else {
- // Found the CRLF.
- // Hack: if malformed vCard files (having no \r\n or \r\n\r\n ending) are
- // concatenated, we can get a malformed line in the document which looks like:
- // END:VCARDBEGIN:VCARD
- // In that situation, we should actually insert the \r\n sequence manually,
- // and return mEnd after the END:VCARD\r\n position.
- QByteArray cr(VersitUtils::encode('\r', mCodec));
- QByteArray lf(VersitUtils::encode('\n', mCodec));
- QByteArray ev(VersitUtils::encode(QByteArray("END:VCARD"), mCodec));
- QByteArray evbv(VersitUtils::encode(QByteArray("END:VCARDBEGIN:VCARD"), mCodec));
- QByteArray evcrlf(VersitUtils::encode(QByteArray("END:VCARD\r\n"), mCodec));
- int crSz = cr.size();
- int lfSz = lf.size();
- int evSz = ev.size();
- int evcrlfSz = evcrlf.size();
-
- QByteArray possiblyMalformedLine = cursor->mData.mid(cursor->mStart, crlfPos-cursor->mStart);
- int pmlEnd = possiblyMalformedLine.size() - 1;
- while (true) {
- if (QVersitReaderPrivate::containsAt(possiblyMalformedLine, cr, pmlEnd - crSz)) {
- possiblyMalformedLine.chop(crSz);
- } else if (QVersitReaderPrivate::containsAt(possiblyMalformedLine, lf, pmlEnd - lfSz)) {
- possiblyMalformedLine.chop(lfSz);
- } else {
- break;
- }
- }
- if (possiblyMalformedLine == evbv) {
- // fix up the malformed line, return the end cursor after it.
- cursor->mData.replace(cursor->mStart, evSz, evcrlf);
- cursor->mEnd = cursor->mStart+evcrlfSz;
- return true;
- } else {
- // A well-formed line.
- cursor->mEnd = crlfPos;
- return true;
- }
+ // A well-formed line.
+ cursor->mEnd = nlPos;
}
+
+ return true;
}
}
- if (crlfPos == -1) {
- // No CRLF found.
+ if (nlPos == -1) {
+ // No newline found.
cursor->mEnd = cursor->mData.size();
// Next time, continue searching from here.
- // The largest CRLF will have a size of 8 bytes, so we should backtrack 8 bytes
- mSearchFrom = qMax(mSearchFrom, cursor->mEnd-8);
+ // The largest newline will have a size of 4 bytes, so we should backtrack 4 bytes
+ mSearchFrom = qMax(mSearchFrom, cursor->mEnd - 4);
return false;
}
}
diff --git a/src/versit/qversitreader_p.h b/src/versit/qversitreader_p.h
index 49133420d..1716295b4 100644
--- a/src/versit/qversitreader_p.h
+++ b/src/versit/qversitreader_p.h
@@ -184,7 +184,6 @@ private:
bool mIsCodecCertain;
bool mIsCodecUtf8Compatible;
int mChunkSize; // How many bytes to read in one go.
- QList<QByteArrayMatcher> mCrlfList;
QStack<QByteArray> mPushedLines; // Stores a lines that has been "pushed" in front by pushLine
LByteArray mBuffer;
int mOdometer;
diff --git a/tests/auto/versit/qversitreader/tst_qversitreader.cpp b/tests/auto/versit/qversitreader/tst_qversitreader.cpp
index 22a1b8306..6a73c4dda 100644
--- a/tests/auto/versit/qversitreader/tst_qversitreader.cpp
+++ b/tests/auto/versit/qversitreader/tst_qversitreader.cpp
@@ -381,6 +381,20 @@ void tst_QVersitReader::testReading()
QCOMPARE(mReader->error(), QVersitReader::NoError);
QCOMPARE(results.count(),2);
+ // Exception case for properties not ending in in CrLf, but Cr or Lf
+ const QByteArray& lfDocument =
+ "BEGIN:VCARD\r\nVERSION:2.1\r\nFN:John\nORG:ACME\rEND:VCARD\r\n";
+ mInputDevice->close();
+ mInputDevice->setData(lfDocument);
+ mInputDevice->open(QBuffer::ReadOnly);
+ mInputDevice->seek(0);
+ QVERIFY2(mReader->startReading(), QString::number(mReader->error()).toLatin1().data());
+ QVERIFY2(mReader->waitForFinished(), QString::number(mReader->error()).toLatin1().data());
+ results = mReader->results();
+ QCOMPARE(mReader->state(), QVersitReader::FinishedState);
+ QCOMPARE(mReader->error(), QVersitReader::NoError);
+ QCOMPARE(results.count(), 1);
+
// Exception case for a property ending in =CrLfCrLf, ie "=\r\n\r\n"
const QByteArray& myTest =
"BEGIN:VCARD\r\nVERSION:4.0\r\nFN:John\r\n"