scala/mail-parser/mail-parser.sc
author Tomas Zeman <tzeman@volny.cz>
Thu, 03 May 2018 11:54:46 +0200
changeset 53 09b1d3c0aa20
permissions -rwxr-xr-x
scala/mail-parser/mail-parser.sc: email parser (based on java mail api); ammonite.io script

#!/usr/bin/env amm

import java.io.{ByteArrayInputStream, File, InputStream}
import java.nio.file.{Files, StandardCopyOption}
import java.text.SimpleDateFormat
import java.util.Date

import ammonite.main.Router.{doc, main}
import ammonite.ops._
import upickle.default

//import $ivy.`javax.mail:javax.mail-api:1.6.1`
//import $ivy.`com.sun.mail:mailapi:1.6.1`
import $ivy.`javax.mail:javax.mail-api:1.6.1`
import $ivy.`com.sun.mail:mailapi:1.6.1`
import scala.language.postfixOps
import javax.mail.internet._
import javax.mail._
import javax.mail.{Address => mAddress}
import javax.mail.internet.{ContentType => mContentType}

import scala.collection.JavaConverters._

case class Address(email: String, name: Option[String], `type`: String)

object Address {
  def apply(a: mAddress): Option[Address] = a match {
    case ia:InternetAddress =>
      Some(Address(ia.getAddress, Option(ia.getPersonal), a.getType))
    case _ =>
      None
  }
}

sealed abstract class SystemFlag(code: String)
object SystemFlag {
  case object ANSWERED extends SystemFlag("Answered")
  case object DELETED extends SystemFlag("Deleted")
  case object DRAFT extends SystemFlag("Draft")
  case object FLAGGED extends SystemFlag("Flagged")
  case object RECENT extends SystemFlag("Recent")
  case object SEEN extends SystemFlag("Seen")
  case object USER extends SystemFlag("User")

  def apply(fl: Flags.Flag): Option[SystemFlag] = fl match {
    case Flags.Flag.ANSWERED => Some(ANSWERED)
    case Flags.Flag.DELETED => Some(DELETED)
    case Flags.Flag.DRAFT => Some(DRAFT)
    case Flags.Flag.FLAGGED => Some(FLAGGED)
    case Flags.Flag.RECENT => Some(RECENT)
    case Flags.Flag.SEEN => Some(SEEN)
    case Flags.Flag.USER => Some(USER)
    case _ => None
  }
}

case class Header(name: String, value: String)

case class Envelope(
  from: List[Address],
  replyTo: List[Address],
  to: List[Address],
  cc: List[Address],
  subject: Option[String],
  received: Option[Date],
  sent: Option[Date],
  systemFlags: List[SystemFlag],
  userFlags: List[String],
  headers: List[Header],
  encoding: Option[String],
  messageId: Option[String]
)

object Envelope {
  private def safe(v: Array[mAddress]): List[mAddress] =
    if (v==null) Nil else v toList

  def apply(m: Message): Envelope = {
    val mm = m match {
      case mm: MimeMessage => Some(mm)
      case _ => None
    }
    Envelope(safe(m.getFrom) flatMap(Address(_)),
      safe(m.getReplyTo) flatMap(Address(_)),
      safe(m.getRecipients(Message.RecipientType.TO)) flatMap(Address(_)),
      safe(m.getRecipients(Message.RecipientType.CC)) flatMap(Address(_)),
      Option(MimeUtility.decodeText(m.getSubject)),
      Option(m.getReceivedDate),
      Option(m.getSentDate),
      Option(m.getFlags).toList flatMap(_.getSystemFlags) flatMap(SystemFlag(_)),
      Option(m.getFlags).toList flatMap(_.getUserFlags),
      m.getAllHeaders.asScala map(h => Header(h.getName, h.getValue)) toList,
      mm flatMap(v => Option(v.getEncoding)),
      mm flatMap(v => Option(v.getMessageID))
    )
  }
}

case class ContentType(
  primaryType: String,
  subType: String,
  charset: Option[String],
  parameters: List[ContentType.Parameter]
)

object ContentType {
  case class Parameter(name: String, value: String)

  def apply(s: String): ContentType = {
    val ct = new mContentType(s)
    val pl = ct.getParameterList
    ContentType(ct.getPrimaryType, ct.getSubType,
      Option(pl.get("charset")),
      pl.getNames.asScala map(n => Parameter(n, pl.get(n))) toList)
  }
}

case class Descriptor(
  contentType: ContentType,
  size: Option[Int],
  lineCount: Option[Int],
  disposition: Option[String],
  description: Option[String],
  fileName: Option[String],
  headers: List[Header],
  encoding: Option[String]
)

object Descriptor {
  def apply(p: Part): Descriptor = Descriptor(
    ContentType(p.getContentType),
    Option(p.getSize) filterNot(_ == -1),
    Option(p.getLineCount) filterNot(_ == -1),
    Option(p.getDisposition),
    Option(p.getDescription),
    Option(p.getFileName),
    Option(p.getAllHeaders).toList flatMap(_.asScala)
      map(h => Header(h.getName, h.getValue)),
    p match {
      case mp: MimePart => Option(mp.getEncoding)
      case _ => None
    }
  )
}

sealed abstract class Content { def descriptor: Descriptor }
object Content {

  case class Plain(text: String, descriptor: Descriptor) extends Content
  case class Html(text: String, descriptor: Descriptor) extends Content
  case class Text(text: String, descriptor: Descriptor) extends Content
  case class Multi(parts: List[Content], descriptor: Descriptor) extends Content
  case class Nested(message: Content, descriptor: Descriptor) extends Content
  case class Stream(bytes: Array[Byte], descriptor: Descriptor) extends Content
  case class Attachment(file: String, descriptor: Descriptor) extends Content
  case class Unknown(clz: String, descriptor: Descriptor) extends Content

  def apply(m: Part)(implicit attDir: File): Content = {
    val d = Descriptor(m)
    m.getContent match {
      case t: String if m.isMimeType("text/plain") => Plain(t, d)
      case t: String if m.isMimeType("text/html") => Html(t, d)
      case t: String => Text(t, d)
      case mp: Multipart if m.isMimeType("multipart/*") =>
        Multi(0 until mp.getCount map(i => Content(mp.getBodyPart(i))) toList,
          d)
      case p: Part if m.isMimeType("message/rfc822") =>
        Nested(Content(p), d)
      case p: MimeBodyPart =>
        val f = File.createTempFile("mail-", ".att", attDir)
        p.saveFile(f)
        Attachment(f.getAbsolutePath, d)
      case is: InputStream =>
        val f = File.createTempFile("mail-", ".att", attDir)
        Files.copy(is, f.toPath, StandardCopyOption.REPLACE_EXISTING)
        Attachment(f.getAbsolutePath, d)
      case x => Unknown(x.getClass.getName, d)
    }
  }
}

case class EmailMessage(
  envelope: Envelope,
  content: Content
)

object EmailMessage {
  def apply(m: Message)(implicit attDir: File): EmailMessage = {
    EmailMessage(Envelope(m), Content(m))
  }
}

@main
def main(
  @doc("Email message file") mail: Path,
  @doc("Output directory") outDir: Path): Unit = {

  val session = Session.getInstance(System.getProperties)
  val msg = new MimeMessage(session, new ByteArrayInputStream(read.bytes(mail)))
  import upickle.default._
  import upickle.Js
  val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ")
  implicit val w: default.Writer[Date] = Writer[Date](d => Js.Str(df.format(d)))
  outDir.toIO.mkdirs()
  val m = EmailMessage(msg)(outDir.toIO)
  ammonite.ops.write.over(outDir / 'mail, write(m))
  println(s"Written to $outDir")
}