scala/mail-parser/mail-parser.sc: email parser (based on java mail api); ammonite.io script
authorTomas Zeman <tzeman@volny.cz>
Thu, 03 May 2018 11:54:46 +0200
changeset 53 09b1d3c0aa20
parent 52 c0d94e64d89a
child 54 21fabe8ab141
scala/mail-parser/mail-parser.sc: email parser (based on java mail api); ammonite.io script
.hgignore
scala/mail-parser/mail-parser.sc
--- a/.hgignore	Tue Jan 09 12:28:15 2018 +0100
+++ b/.hgignore	Thu May 03 11:54:46 2018 +0200
@@ -3,3 +3,5 @@
 *~
 .*.swp
 .netrwhist
+.idea
+*.iml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/mail-parser/mail-parser.sc	Thu May 03 11:54:46 2018 +0200
@@ -0,0 +1,208 @@
+#!/usr/bin/env amm
+
+import java.io.{ByteArrayInputStream, File, InputStream}
+import java.nio.file.{Files, StandardCopyOption}
+import java.text.SimpleDateFormat
+import java.util.Date
+
+import ammonite.main.Router.{doc, main}
+import ammonite.ops._
+import upickle.default
+
+//import $ivy.`javax.mail:javax.mail-api:1.6.1`
+//import $ivy.`com.sun.mail:mailapi:1.6.1`
+import $ivy.`javax.mail:javax.mail-api:1.6.1`
+import $ivy.`com.sun.mail:mailapi:1.6.1`
+import scala.language.postfixOps
+import javax.mail.internet._
+import javax.mail._
+import javax.mail.{Address => mAddress}
+import javax.mail.internet.{ContentType => mContentType}
+
+import scala.collection.JavaConverters._
+
+case class Address(email: String, name: Option[String], `type`: String)
+
+object Address {
+  def apply(a: mAddress): Option[Address] = a match {
+    case ia:InternetAddress =>
+      Some(Address(ia.getAddress, Option(ia.getPersonal), a.getType))
+    case _ =>
+      None
+  }
+}
+
+sealed abstract class SystemFlag(code: String)
+object SystemFlag {
+  case object ANSWERED extends SystemFlag("Answered")
+  case object DELETED extends SystemFlag("Deleted")
+  case object DRAFT extends SystemFlag("Draft")
+  case object FLAGGED extends SystemFlag("Flagged")
+  case object RECENT extends SystemFlag("Recent")
+  case object SEEN extends SystemFlag("Seen")
+  case object USER extends SystemFlag("User")
+
+  def apply(fl: Flags.Flag): Option[SystemFlag] = fl match {
+    case Flags.Flag.ANSWERED => Some(ANSWERED)
+    case Flags.Flag.DELETED => Some(DELETED)
+    case Flags.Flag.DRAFT => Some(DRAFT)
+    case Flags.Flag.FLAGGED => Some(FLAGGED)
+    case Flags.Flag.RECENT => Some(RECENT)
+    case Flags.Flag.SEEN => Some(SEEN)
+    case Flags.Flag.USER => Some(USER)
+    case _ => None
+  }
+}
+
+case class Header(name: String, value: String)
+
+case class Envelope(
+  from: List[Address],
+  replyTo: List[Address],
+  to: List[Address],
+  cc: List[Address],
+  subject: Option[String],
+  received: Option[Date],
+  sent: Option[Date],
+  systemFlags: List[SystemFlag],
+  userFlags: List[String],
+  headers: List[Header],
+  encoding: Option[String],
+  messageId: Option[String]
+)
+
+object Envelope {
+  private def safe(v: Array[mAddress]): List[mAddress] =
+    if (v==null) Nil else v toList
+
+  def apply(m: Message): Envelope = {
+    val mm = m match {
+      case mm: MimeMessage => Some(mm)
+      case _ => None
+    }
+    Envelope(safe(m.getFrom) flatMap(Address(_)),
+      safe(m.getReplyTo) flatMap(Address(_)),
+      safe(m.getRecipients(Message.RecipientType.TO)) flatMap(Address(_)),
+      safe(m.getRecipients(Message.RecipientType.CC)) flatMap(Address(_)),
+      Option(MimeUtility.decodeText(m.getSubject)),
+      Option(m.getReceivedDate),
+      Option(m.getSentDate),
+      Option(m.getFlags).toList flatMap(_.getSystemFlags) flatMap(SystemFlag(_)),
+      Option(m.getFlags).toList flatMap(_.getUserFlags),
+      m.getAllHeaders.asScala map(h => Header(h.getName, h.getValue)) toList,
+      mm flatMap(v => Option(v.getEncoding)),
+      mm flatMap(v => Option(v.getMessageID))
+    )
+  }
+}
+
+case class ContentType(
+  primaryType: String,
+  subType: String,
+  charset: Option[String],
+  parameters: List[ContentType.Parameter]
+)
+
+object ContentType {
+  case class Parameter(name: String, value: String)
+
+  def apply(s: String): ContentType = {
+    val ct = new mContentType(s)
+    val pl = ct.getParameterList
+    ContentType(ct.getPrimaryType, ct.getSubType,
+      Option(pl.get("charset")),
+      pl.getNames.asScala map(n => Parameter(n, pl.get(n))) toList)
+  }
+}
+
+case class Descriptor(
+  contentType: ContentType,
+  size: Option[Int],
+  lineCount: Option[Int],
+  disposition: Option[String],
+  description: Option[String],
+  fileName: Option[String],
+  headers: List[Header],
+  encoding: Option[String]
+)
+
+object Descriptor {
+  def apply(p: Part): Descriptor = Descriptor(
+    ContentType(p.getContentType),
+    Option(p.getSize) filterNot(_ == -1),
+    Option(p.getLineCount) filterNot(_ == -1),
+    Option(p.getDisposition),
+    Option(p.getDescription),
+    Option(p.getFileName),
+    Option(p.getAllHeaders).toList flatMap(_.asScala)
+      map(h => Header(h.getName, h.getValue)),
+    p match {
+      case mp: MimePart => Option(mp.getEncoding)
+      case _ => None
+    }
+  )
+}
+
+sealed abstract class Content { def descriptor: Descriptor }
+object Content {
+
+  case class Plain(text: String, descriptor: Descriptor) extends Content
+  case class Html(text: String, descriptor: Descriptor) extends Content
+  case class Text(text: String, descriptor: Descriptor) extends Content
+  case class Multi(parts: List[Content], descriptor: Descriptor) extends Content
+  case class Nested(message: Content, descriptor: Descriptor) extends Content
+  case class Stream(bytes: Array[Byte], descriptor: Descriptor) extends Content
+  case class Attachment(file: String, descriptor: Descriptor) extends Content
+  case class Unknown(clz: String, descriptor: Descriptor) extends Content
+
+  def apply(m: Part)(implicit attDir: File): Content = {
+    val d = Descriptor(m)
+    m.getContent match {
+      case t: String if m.isMimeType("text/plain") => Plain(t, d)
+      case t: String if m.isMimeType("text/html") => Html(t, d)
+      case t: String => Text(t, d)
+      case mp: Multipart if m.isMimeType("multipart/*") =>
+        Multi(0 until mp.getCount map(i => Content(mp.getBodyPart(i))) toList,
+          d)
+      case p: Part if m.isMimeType("message/rfc822") =>
+        Nested(Content(p), d)
+      case p: MimeBodyPart =>
+        val f = File.createTempFile("mail-", ".att", attDir)
+        p.saveFile(f)
+        Attachment(f.getAbsolutePath, d)
+      case is: InputStream =>
+        val f = File.createTempFile("mail-", ".att", attDir)
+        Files.copy(is, f.toPath, StandardCopyOption.REPLACE_EXISTING)
+        Attachment(f.getAbsolutePath, d)
+      case x => Unknown(x.getClass.getName, d)
+    }
+  }
+}
+
+case class EmailMessage(
+  envelope: Envelope,
+  content: Content
+)
+
+object EmailMessage {
+  def apply(m: Message)(implicit attDir: File): EmailMessage = {
+    EmailMessage(Envelope(m), Content(m))
+  }
+}
+
+@main
+def main(
+  @doc("Email message file") mail: Path,
+  @doc("Output directory") outDir: Path): Unit = {
+
+  val session = Session.getInstance(System.getProperties)
+  val msg = new MimeMessage(session, new ByteArrayInputStream(read.bytes(mail)))
+  import upickle.default._
+  import upickle.Js
+  val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ")
+  implicit val w: default.Writer[Date] = Writer[Date](d => Js.Str(df.format(d)))
+  outDir.toIO.mkdirs()
+  val m = EmailMessage(msg)(outDir.toIO)
+  ammonite.ops.write.over(outDir / 'mail, write(m))
+  println(s"Written to $outDir")
+}