diff options
Diffstat (limited to 'java/com/google/gerrit/mail/RawMailParser.java')
-rw-r--r-- | java/com/google/gerrit/mail/RawMailParser.java | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/java/com/google/gerrit/mail/RawMailParser.java b/java/com/google/gerrit/mail/RawMailParser.java new file mode 100644 index 0000000000..b7e2030b2b --- /dev/null +++ b/java/com/google/gerrit/mail/RawMailParser.java @@ -0,0 +1,174 @@ +// Copyright (C) 2016 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gerrit.mail; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.io.CharStreams; +import com.google.common.primitives.Ints; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import org.apache.james.mime4j.MimeException; +import org.apache.james.mime4j.dom.Entity; +import org.apache.james.mime4j.dom.Message; +import org.apache.james.mime4j.dom.MessageBuilder; +import org.apache.james.mime4j.dom.Multipart; +import org.apache.james.mime4j.dom.TextBody; +import org.apache.james.mime4j.dom.address.Mailbox; +import org.apache.james.mime4j.message.DefaultMessageBuilder; + +/** Parses raw email content received through POP3 or IMAP into an internal {@link MailMessage}. */ +public class RawMailParser { + private static final ImmutableSet<String> MAIN_HEADERS = + ImmutableSet.of("to", "from", "cc", "date", "message-id", "subject", "content-type"); + + private RawMailParser() {} + + /** + * Parses a MailMessage from a string. + * + * @param raw {@link String} payload as received over the wire + * @return parsed {@link MailMessage} + * @throws MailParsingException in case parsing fails + */ + public static MailMessage parse(String raw) throws MailParsingException { + MailMessage.Builder messageBuilder = MailMessage.builder(); + messageBuilder.rawContentUTF(raw); + Message mimeMessage; + try { + MessageBuilder builder = new DefaultMessageBuilder(); + mimeMessage = builder.parseMessage(new ByteArrayInputStream(raw.getBytes(UTF_8))); + } catch (IOException | MimeException e) { + throw new MailParsingException("Can't parse email", e); + } + // Add general headers + if (mimeMessage.getMessageId() != null) { + messageBuilder.id(mimeMessage.getMessageId()); + } + if (mimeMessage.getSubject() != null) { + messageBuilder.subject(mimeMessage.getSubject()); + } + if (mimeMessage.getDate() != null) { + messageBuilder.dateReceived(mimeMessage.getDate().toInstant()); + } + + // Add From, To and Cc + if (mimeMessage.getFrom() != null && mimeMessage.getFrom().size() > 0) { + Mailbox from = mimeMessage.getFrom().get(0); + messageBuilder.from(new Address(from.getName(), from.getAddress())); + } + if (mimeMessage.getTo() != null) { + for (Mailbox m : mimeMessage.getTo().flatten()) { + messageBuilder.addTo(new Address(m.getName(), m.getAddress())); + } + } + if (mimeMessage.getCc() != null) { + for (Mailbox m : mimeMessage.getCc().flatten()) { + messageBuilder.addCc(new Address(m.getName(), m.getAddress())); + } + } + + // Add additional headers + mimeMessage.getHeader().getFields().stream() + .filter(f -> !MAIN_HEADERS.contains(f.getName().toLowerCase())) + .forEach(f -> messageBuilder.addAdditionalHeader(f.getName() + ": " + f.getBody())); + + // Add text and html body parts + StringBuilder textBuilder = new StringBuilder(); + StringBuilder htmlBuilder = new StringBuilder(); + try { + handleMimePart(mimeMessage, textBuilder, htmlBuilder); + } catch (IOException e) { + throw new MailParsingException("Can't parse email", e); + } + messageBuilder.textContent(Strings.emptyToNull(textBuilder.toString())); + messageBuilder.htmlContent(Strings.emptyToNull(htmlBuilder.toString())); + + try { + // build() will only succeed if all required attributes were set. We wrap + // the IllegalStateException in a MailParsingException indicating that + // required attributes are missing, so that the caller doesn't fall over. + return messageBuilder.build(); + } catch (IllegalStateException e) { + throw new MailParsingException("Missing required attributes after email was parsed", e); + } + } + + /** + * Parses a MailMessage from an array of characters. Note that the character array is int-typed. + * This method is only used by POP3, which specifies that all transferred characters are US-ASCII + * (RFC 6856). When reading the input in Java, io.Reader yields ints. These can be safely + * converted to chars as all US-ASCII characters fit in a char. If emails contain non-ASCII + * characters, such as UTF runes, these will be encoded in ASCII using either Base64 or + * quoted-printable encoding. + * + * @param chars Array as received over the wire + * @return Parsed {@link MailMessage} + * @throws MailParsingException in case parsing fails + */ + public static MailMessage parse(int[] chars) throws MailParsingException { + StringBuilder b = new StringBuilder(chars.length); + for (int c : chars) { + b.append((char) c); + } + + MailMessage.Builder messageBuilder = parse(b.toString()).toBuilder(); + messageBuilder.rawContent(ImmutableList.copyOf(Ints.asList(chars))); + return messageBuilder.build(); + } + + /** + * Traverses a mime tree and parses out text and html parts. All other parts will be dropped. + * + * @param part {@code MimePart} to parse + * @param textBuilder {@link StringBuilder} to append all plaintext parts + * @param htmlBuilder {@link StringBuilder} to append all html parts + * @throws IOException in case of a failure while transforming the input to a {@link String} + */ + private static void handleMimePart( + Entity part, StringBuilder textBuilder, StringBuilder htmlBuilder) throws IOException { + if (isPlainOrHtml(part.getMimeType()) && !isAttachment(part.getDispositionType())) { + TextBody tb = (TextBody) part.getBody(); + String result = + CharStreams.toString(new InputStreamReader(tb.getInputStream(), tb.getMimeCharset())); + if (part.getMimeType().equals("text/plain")) { + textBuilder.append(result); + } else if (part.getMimeType().equals("text/html")) { + htmlBuilder.append(result); + } + } else if (isMultipart(part.getMimeType())) { + Multipart multipart = (Multipart) part.getBody(); + for (Entity e : multipart.getBodyParts()) { + handleMimePart(e, textBuilder, htmlBuilder); + } + } + } + + private static boolean isPlainOrHtml(String mimeType) { + return (mimeType.equals("text/plain") || mimeType.equals("text/html")); + } + + private static boolean isMultipart(String mimeType) { + return mimeType.startsWith("multipart/"); + } + + private static boolean isAttachment(String dispositionType) { + return dispositionType != null && dispositionType.equals("attachment"); + } +} |