1 files changed, 702 insertions, 0 deletions
diff --git a/chromium/third_party/libwebm/source/webvttparser.cc b/chromium/third_party/libwebm/source/webvttparser.cc
new file mode 100644
index 00000000000..655252c35f9
--- /dev/null
+++ b/chromium/third_party/libwebm/source/webvttparser.cc
@@ -0,0 +1,702 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "./webvttparser.h"  // NOLINT
+#include <climits>
+
+namespace libwebvtt {
+
+// NOLINT'ing this enum because clang-format puts it in a single line which
+// makes it look really unreadable.
+enum {
+  kNUL = '\x00',
+  kSPACE = ' ',
+  kTAB = '\x09',
+  kLF = '\x0A',
+  kCR = '\x0D'
+};  // NOLINT
+
+Reader::~Reader() {}
+
+LineReader::~LineReader() {}
+
+int LineReader::GetLine(std::string* line_ptr) {
+  if (line_ptr == NULL)
+    return -1;
+
+  std::string& ln = *line_ptr;
+  ln.clear();
+
+  // Consume characters from the stream, until we
+  // reach end-of-line (or end-of-stream).
+
+  // The WebVTT spec states that lines may be
+  // terminated in any of these three ways:
+  //  LF
+  //  CR
+  //  CR LF
+
+  // We interrogate each character as we read it from the stream.
+  // If we detect an end-of-line character, we consume the full
+  // end-of-line indication, and we're done; otherwise, accumulate
+  // the character and repeat.
+
+  for (;;) {
+    char c;
+    const int e = GetChar(&c);
+
+    if (e < 0)  // error
+      return e;
+
+    if (e > 0)  // EOF
+      return (ln.empty()) ? 1 : 0;
+
+    // We have a character, so we must first determine
+    // whether we have reached end-of-line.
+
+    if (c == kLF)
+      return 0;  // handle the easy end-of-line case immediately
+
+    if (c == kCR)
+      break;  // handle the hard end-of-line case outside of loop
+
+    if (c == '\xFE' || c == '\xFF')  // not UTF-8
+      return -1;
+
+    // To defend against pathological or malicious streams, we
+    // cap the line length at some arbitrarily-large value:
+    enum { kMaxLineLength = 10000 };  // arbitrary
+
+    if (ln.length() >= kMaxLineLength)
+      return -1;
+
+    // We don't have an end-of-line character, so accumulate
+    // the character in our line buffer.
+    ln.push_back(c);
+  }
+
+  // We detected a CR.  We must interrogate the next character
+  // in the stream, to determine whether we have a LF (which
+  // would make it part of this same line).
+
+  char c;
+  const int e = GetChar(&c);
+
+  if (e < 0)  // error
+    return e;
+
+  if (e > 0)  // EOF
+    return 0;
+
+  // If next character in the stream is not a LF, return it
+  // to the stream (because it's part of the next line).
+  if (c != kLF)
+    UngetChar(c);
+
+  return 0;
+}
+
+Parser::Parser(Reader* r) : reader_(r), unget_(-1) {}
+
+Parser::~Parser() {}
+
+int Parser::Init() {
+  int e = ParseBOM();
+
+  if (e < 0)  // error
+    return e;
+
+  if (e > 0)  // EOF
+    return -1;
+
+  // Parse "WEBVTT".  We read from the stream one character at-a-time, in
+  // order to defend against non-WebVTT streams (e.g. binary files) that don't
+  // happen to comprise lines of text demarcated with line terminators.
+
+  const char kId[] = "WEBVTT";
+
+  for (const char* p = kId; *p; ++p) {
+    char c;
+    e = GetChar(&c);
+
+    if (e < 0)  // error
+      return e;
+
+    if (e > 0)  // EOF
+      return -1;
+
+    if (c != *p)
+      return -1;
+  }
+
+  std::string line;
+
+  e = GetLine(&line);
+
+  if (e < 0)  // error
+    return e;
+
+  if (e > 0)  // EOF
+    return 0;  // weird but valid
+
+  if (!line.empty()) {
+    // Parse optional characters that follow "WEBVTT"
+
+    const char c = line[0];
+
+    if (c != kSPACE && c != kTAB)
+      return -1;
+  }
+
+  // The WebVTT spec requires that the "WEBVTT" line
+  // be followed by an empty line (to separate it from
+  // first cue).
+
+  e = GetLine(&line);
+
+  if (e < 0)  // error
+    return e;
+
+  if (e > 0)  // EOF
+    return 0;  // weird but we allow it
+
+  if (!line.empty())
+    return -1;
+
+  return 0;  // success
+}
+
+int Parser::Parse(Cue* cue) {
+  if (cue == NULL)
+    return -1;
+
+  // Parse first non-blank line
+
+  std::string line;
+  int e;
+
+  for (;;) {
+    e = GetLine(&line);
+
+    if (e)  // EOF is OK here
+      return e;
+
+    if (!line.empty())
+      break;
+  }
+
+  // A WebVTT cue comprises an optional cue identifier line followed
+  // by a (non-optional) timings line.  You determine whether you have
+  // a timings line by scanning for the arrow token, the lexeme of which
+  // may not appear in the cue identifier line.
+
+  const char kArrow[] = "-->";
+  std::string::size_type arrow_pos = line.find(kArrow);
+
+  if (arrow_pos != std::string::npos) {
+    // We found a timings line, which implies that we don't have a cue
+    // identifier.
+
+    cue->identifier.clear();
+  } else {
+    // We did not find a timings line, so we assume that we have a cue
+    // identifier line, and then try again to find the cue timings on
+    // the next line.
+
+    cue->identifier.swap(line);
+
+    e = GetLine(&line);
+
+    if (e < 0)  // error
+      return e;
+
+    if (e > 0)  // EOF
+      return -1;
+
+    arrow_pos = line.find(kArrow);
+
+    if (arrow_pos == std::string::npos)  // not a timings line
+      return -1;
+  }
+
+  e = ParseTimingsLine(&line, arrow_pos, &cue->start_time, &cue->stop_time,
+                       &cue->settings);
+
+  if (e)  // error
+    return e;
+
+  // The cue payload comprises all the non-empty
+  // lines that follow the timings line.
+
+  Cue::payload_t& p = cue->payload;
+  p.clear();
+
+  for (;;) {
+    e = GetLine(&line);
+
+    if (e < 0)  // error
+      return e;
+
+    if (line.empty())
+      break;
+
+    p.push_back(line);
+  }
+
+  if (p.empty())
+    return -1;
+
+  return 0;  // success
+}
+
+int Parser::GetChar(char* c) {
+  if (unget_ >= 0) {
+    *c = static_cast<char>(unget_);
+    unget_ = -1;
+    return 0;
+  }
+
+  return reader_->GetChar(c);
+}
+
+void Parser::UngetChar(char c) { unget_ = static_cast<unsigned char>(c); }
+
+int Parser::ParseBOM() {
+  // Explanation of UTF-8 BOM:
+  // http://en.wikipedia.org/wiki/Byte_order_mark
+
+  static const char BOM[] = "\xEF\xBB\xBF";  // UTF-8 BOM
+
+  for (int i = 0; i < 3; ++i) {
+    char c;
+    int e = GetChar(&c);
+
+    if (e < 0)  // error
+      return e;
+
+    if (e > 0)  // EOF
+      return 1;
+
+    if (c != BOM[i]) {
+      if (i == 0) {  // we don't have a BOM
+        UngetChar(c);
+        return 0;  // success
+      }
+
+      // We started a BOM, so we must finish the BOM.
+      return -1;  // error
+    }
+  }
+
+  return 0;  // success
+}
+
+int Parser::ParseTimingsLine(std::string* line_ptr,
+                             std::string::size_type arrow_pos, Time* start_time,
+                             Time* stop_time, Cue::settings_t* settings) {
+  if (line_ptr == NULL)
+    return -1;
+
+  std::string& line = *line_ptr;
+
+  if (arrow_pos == std::string::npos || arrow_pos >= line.length())
+    return -1;
+
+  // Place a NUL character at the start of the arrow token, in
+  // order to demarcate the start time from remainder of line.
+  line[arrow_pos] = kNUL;
+  std::string::size_type idx = 0;
+
+  int e = ParseTime(line, &idx, start_time);
+  if (e)  // error
+    return e;
+
+  // Detect any junk that follows the start time,
+  // but precedes the arrow symbol.
+
+  while (char c = line[idx]) {
+    if (c != kSPACE && c != kTAB)
+      return -1;
+    ++idx;
+  }
+
+  // Place a NUL character at the end of the line,
+  // so the scanner has a place to stop, and begin
+  // the scan just beyond the arrow token.
+
+  line.push_back(kNUL);
+  idx = arrow_pos + 3;
+
+  e = ParseTime(line, &idx, stop_time);
+  if (e)  // error
+    return e;
+
+  e = ParseSettings(line, idx, settings);
+  if (e)  // error
+    return e;
+
+  return 0;  // success
+}
+
+int Parser::ParseTime(const std::string& line, std::string::size_type* idx_ptr,
+                      Time* time) {
+  if (idx_ptr == NULL)
+    return -1;
+
+  std::string::size_type& idx = *idx_ptr;
+
+  if (idx == std::string::npos || idx >= line.length())
+    return -1;
+
+  if (time == NULL)
+    return -1;
+
+  // Consume any whitespace that precedes the timestamp.
+
+  while (char c = line[idx]) {
+    if (c != kSPACE && c != kTAB)
+      break;
+    ++idx;
+  }
+
+  // WebVTT timestamp syntax comes in three flavors:
+  //  SS[.sss]
+  //  MM:SS[.sss]
+  //  HH:MM:SS[.sss]
+
+  // Parse a generic number value.  We don't know which component
+  // of the time we have yet, until we do more parsing.
+
+  int val = ParseNumber(line, &idx);
+
+  if (val < 0)  // error
+    return val;
+
+  Time& t = *time;
+
+  // The presence of a colon character indicates that we have
+  // an [HH:]MM:SS style syntax.
+
+  if (line[idx] == ':') {
+    // We have either HH:MM:SS or MM:SS
+
+    // The value we just parsed is either the hours or minutes.
+    // It must be followed by another number value (that is
+    // either minutes or seconds).
+
+    const int first_val = val;
+
+    ++idx;  // consume colon
+
+    // Parse second value
+
+    val = ParseNumber(line, &idx);
+
+    if (val < 0)
+      return val;
+
+    if (val >= 60)  // either MM or SS
+      return -1;
+
+    if (line[idx] == ':') {
+      // We have HH:MM:SS
+
+      t.hours = first_val;
+      t.minutes = val;  // vetted above
+
+      ++idx;  // consume MM:SS colon
+
+      // We have parsed the hours and minutes.
+      // We must now parse the seconds.
+
+      val = ParseNumber(line, &idx);
+
+      if (val < 0)
+        return val;
+
+      if (val >= 60)  // SS part of HH:MM:SS
+        return -1;
+
+      t.seconds = val;
+    } else {
+      // We have MM:SS
+
+      // The implication here is that the hour value was omitted
+      // from the timestamp (because it was 0).
+
+      if (first_val >= 60)  // minutes
+        return -1;
+
+      t.hours = 0;
+      t.minutes = first_val;
+      t.seconds = val;  // vetted above
+    }
+  } else {
+    // We have SS (only)
+
+    // The time is expressed as total number of seconds,
+    // so the seconds value has no upper bound.
+
+    t.seconds = val;
+
+    // Convert SS to HH:MM:SS
+
+    t.minutes = t.seconds / 60;
+    t.seconds -= t.minutes * 60;
+
+    t.hours = t.minutes / 60;
+    t.minutes -= t.hours * 60;
+  }
+
+  // We have parsed the hours, minutes, and seconds.
+  // We must now parse the milliseconds.
+
+  char c = line[idx];
+
+  // TODO(matthewjheaney): one option here is to slightly relax the
+  // syntax rules for WebVTT timestamps, to permit the comma character
+  // to also be used as the seconds/milliseconds separator.  This
+  // would handle streams that use localization conventions for
+  // countries in Western Europe.  For now we obey the rules specified
+  // in the WebVTT spec (allow "full stop" only).
+
+  const bool have_milliseconds = (c == '.');
+
+  if (!have_milliseconds) {
+    t.milliseconds = 0;
+  } else {
+    ++idx;  // consume FULL STOP
+
+    val = ParseNumber(line, &idx);
+
+    if (val < 0)
+      return val;
+
+    if (val >= 1000)
+      return -1;
+
+    if (val < 10)
+      t.milliseconds = val * 100;
+    else if (val < 100)
+      t.milliseconds = val * 10;
+    else
+      t.milliseconds = val;
+  }
+
+  // We have parsed the time proper.  We must check for any
+  // junk that immediately follows the time specifier.
+
+  c = line[idx];
+
+  if (c != kNUL && c != kSPACE && c != kTAB)
+    return -1;
+
+  return 0;  // success
+}
+
+int Parser::ParseSettings(const std::string& line, std::string::size_type idx,
+                          Cue::settings_t* settings) {
+  settings->clear();
+
+  if (idx == std::string::npos || idx >= line.length())
+    return -1;
+
+  for (;;) {
+    // We must parse a line comprising a sequence of 0 or more
+    // NAME:VALUE pairs, separated by whitespace.  The line iself is
+    // terminated with a NUL char (indicating end-of-line).
+
+    for (;;) {
+      const char c = line[idx];
+
+      if (c == kNUL)  // end-of-line
+        return 0;  // success
+
+      if (c != kSPACE && c != kTAB)
+        break;
+
+      ++idx;  // consume whitespace
+    }
+
+    // We have consumed the whitespace, and have not yet reached
+    // end-of-line, so there is something on the line for us to parse.
+
+    settings->push_back(Setting());
+    Setting& s = settings->back();
+
+    // Parse the NAME part of the settings pair.
+
+    for (;;) {
+      const char c = line[idx];
+
+      if (c == ':')  // we have reached end of NAME part
+        break;
+
+      if (c == kNUL || c == kSPACE || c == kTAB)
+        return -1;
+
+      s.name.push_back(c);
+
+      ++idx;
+    }
+
+    if (s.name.empty())
+      return -1;
+
+    ++idx;  // consume colon
+
+    // Parse the VALUE part of the settings pair.
+
+    for (;;) {
+      const char c = line[idx];
+
+      if (c == kNUL || c == kSPACE || c == kTAB)
+        break;
+
+      if (c == ':')  // suspicious when part of VALUE
+        return -1;  // TODO(matthewjheaney): verify this behavior
+
+      s.value.push_back(c);
+
+      ++idx;
+    }
+
+    if (s.value.empty())
+      return -1;
+  }
+}
+
+int Parser::ParseNumber(const std::string& line,
+                        std::string::size_type* idx_ptr) {
+  if (idx_ptr == NULL)
+    return -1;
+
+  std::string::size_type& idx = *idx_ptr;
+
+  if (idx == std::string::npos || idx >= line.length())
+    return -1;
+
+  if (!isdigit(line[idx]))
+    return -1;
+
+  int result = 0;
+
+  while (isdigit(line[idx])) {
+    const char c = line[idx];
+    const int i = c - '0';
+
+    if (result > INT_MAX / 10)
+      return -1;
+
+    result *= 10;
+
+    if (result > INT_MAX - i)
+      return -1;
+
+    result += i;
+
+    ++idx;
+  }
+
+  return result;
+}
+
+bool Time::operator==(const Time& rhs) const {
+  if (hours != rhs.hours)
+    return false;
+
+  if (minutes != rhs.minutes)
+    return false;
+
+  if (seconds != rhs.seconds)
+    return false;
+
+  return (milliseconds == rhs.milliseconds);
+}
+
+bool Time::operator<(const Time& rhs) const {
+  if (hours < rhs.hours)
+    return true;
+
+  if (hours > rhs.hours)
+    return false;
+
+  if (minutes < rhs.minutes)
+    return true;
+
+  if (minutes > rhs.minutes)
+    return false;
+
+  if (seconds < rhs.seconds)
+    return true;
+
+  if (seconds > rhs.seconds)
+    return false;
+
+  return (milliseconds < rhs.milliseconds);
+}
+
+bool Time::operator>(const Time& rhs) const { return rhs.operator<(*this); }
+
+bool Time::operator<=(const Time& rhs) const { return !this->operator>(rhs); }
+
+bool Time::operator>=(const Time& rhs) const { return !this->operator<(rhs); }
+
+presentation_t Time::presentation() const {
+  const presentation_t h = 1000LL * 3600LL * presentation_t(hours);
+  const presentation_t m = 1000LL * 60LL * presentation_t(minutes);
+  const presentation_t s = 1000LL * presentation_t(seconds);
+  const presentation_t result = h + m + s + milliseconds;
+  return result;
+}
+
+Time& Time::presentation(presentation_t d) {
+  if (d < 0) {  // error
+    hours = 0;
+    minutes = 0;
+    seconds = 0;
+    milliseconds = 0;
+
+    return *this;
+  }
+
+  seconds = static_cast<int>(d / 1000);
+  milliseconds = static_cast<int>(d - 1000 * seconds);
+
+  minutes = seconds / 60;
+  seconds -= 60 * minutes;
+
+  hours = minutes / 60;
+  minutes -= 60 * hours;
+
+  return *this;
+}
+
+Time& Time::operator+=(presentation_t rhs) {
+  const presentation_t d = this->presentation();
+  const presentation_t dd = d + rhs;
+  this->presentation(dd);
+  return *this;
+}
+
+Time Time::operator+(presentation_t d) const {
+  Time t(*this);
+  t += d;
+  return t;
+}
+
+Time& Time::operator-=(presentation_t d) { return this->operator+=(-d); }
+
+presentation_t Time::operator-(const Time& t) const {
+  const presentation_t rhs = t.presentation();
+  const presentation_t lhs = this->presentation();
+  const presentation_t result = lhs - rhs;
+  return result;
+}
+
+}  // namespace libwebvtt