scala/mail-parser/mail-parser.sc: email parser (based on java mail api); ammonite.io script
#!/usr/bin/env amm
import java.io.{ByteArrayInputStream, File, InputStream}
import java.nio.file.{Files, StandardCopyOption}
import java.text.SimpleDateFormat
import java.util.Date
import ammonite.main.Router.{doc, main}
import ammonite.ops._
import upickle.default
//import $ivy.`javax.mail:javax.mail-api:1.6.1`
//import $ivy.`com.sun.mail:mailapi:1.6.1`
import $ivy.`javax.mail:javax.mail-api:1.6.1`
import $ivy.`com.sun.mail:mailapi:1.6.1`
import scala.language.postfixOps
import javax.mail.internet._
import javax.mail._
import javax.mail.{Address => mAddress}
import javax.mail.internet.{ContentType => mContentType}
import scala.collection.JavaConverters._
case class Address(email: String, name: Option[String], `type`: String)
object Address {
def apply(a: mAddress): Option[Address] = a match {
case ia:InternetAddress =>
Some(Address(ia.getAddress, Option(ia.getPersonal), a.getType))
case _ =>
None
}
}
sealed abstract class SystemFlag(code: String)
object SystemFlag {
case object ANSWERED extends SystemFlag("Answered")
case object DELETED extends SystemFlag("Deleted")
case object DRAFT extends SystemFlag("Draft")
case object FLAGGED extends SystemFlag("Flagged")
case object RECENT extends SystemFlag("Recent")
case object SEEN extends SystemFlag("Seen")
case object USER extends SystemFlag("User")
def apply(fl: Flags.Flag): Option[SystemFlag] = fl match {
case Flags.Flag.ANSWERED => Some(ANSWERED)
case Flags.Flag.DELETED => Some(DELETED)
case Flags.Flag.DRAFT => Some(DRAFT)
case Flags.Flag.FLAGGED => Some(FLAGGED)
case Flags.Flag.RECENT => Some(RECENT)
case Flags.Flag.SEEN => Some(SEEN)
case Flags.Flag.USER => Some(USER)
case _ => None
}
}
case class Header(name: String, value: String)
case class Envelope(
from: List[Address],
replyTo: List[Address],
to: List[Address],
cc: List[Address],
subject: Option[String],
received: Option[Date],
sent: Option[Date],
systemFlags: List[SystemFlag],
userFlags: List[String],
headers: List[Header],
encoding: Option[String],
messageId: Option[String]
)
object Envelope {
private def safe(v: Array[mAddress]): List[mAddress] =
if (v==null) Nil else v toList
def apply(m: Message): Envelope = {
val mm = m match {
case mm: MimeMessage => Some(mm)
case _ => None
}
Envelope(safe(m.getFrom) flatMap(Address(_)),
safe(m.getReplyTo) flatMap(Address(_)),
safe(m.getRecipients(Message.RecipientType.TO)) flatMap(Address(_)),
safe(m.getRecipients(Message.RecipientType.CC)) flatMap(Address(_)),
Option(MimeUtility.decodeText(m.getSubject)),
Option(m.getReceivedDate),
Option(m.getSentDate),
Option(m.getFlags).toList flatMap(_.getSystemFlags) flatMap(SystemFlag(_)),
Option(m.getFlags).toList flatMap(_.getUserFlags),
m.getAllHeaders.asScala map(h => Header(h.getName, h.getValue)) toList,
mm flatMap(v => Option(v.getEncoding)),
mm flatMap(v => Option(v.getMessageID))
)
}
}
case class ContentType(
primaryType: String,
subType: String,
charset: Option[String],
parameters: List[ContentType.Parameter]
)
object ContentType {
case class Parameter(name: String, value: String)
def apply(s: String): ContentType = {
val ct = new mContentType(s)
val pl = ct.getParameterList
ContentType(ct.getPrimaryType, ct.getSubType,
Option(pl.get("charset")),
pl.getNames.asScala map(n => Parameter(n, pl.get(n))) toList)
}
}
case class Descriptor(
contentType: ContentType,
size: Option[Int],
lineCount: Option[Int],
disposition: Option[String],
description: Option[String],
fileName: Option[String],
headers: List[Header],
encoding: Option[String]
)
object Descriptor {
def apply(p: Part): Descriptor = Descriptor(
ContentType(p.getContentType),
Option(p.getSize) filterNot(_ == -1),
Option(p.getLineCount) filterNot(_ == -1),
Option(p.getDisposition),
Option(p.getDescription),
Option(p.getFileName),
Option(p.getAllHeaders).toList flatMap(_.asScala)
map(h => Header(h.getName, h.getValue)),
p match {
case mp: MimePart => Option(mp.getEncoding)
case _ => None
}
)
}
sealed abstract class Content { def descriptor: Descriptor }
object Content {
case class Plain(text: String, descriptor: Descriptor) extends Content
case class Html(text: String, descriptor: Descriptor) extends Content
case class Text(text: String, descriptor: Descriptor) extends Content
case class Multi(parts: List[Content], descriptor: Descriptor) extends Content
case class Nested(message: Content, descriptor: Descriptor) extends Content
case class Stream(bytes: Array[Byte], descriptor: Descriptor) extends Content
case class Attachment(file: String, descriptor: Descriptor) extends Content
case class Unknown(clz: String, descriptor: Descriptor) extends Content
def apply(m: Part)(implicit attDir: File): Content = {
val d = Descriptor(m)
m.getContent match {
case t: String if m.isMimeType("text/plain") => Plain(t, d)
case t: String if m.isMimeType("text/html") => Html(t, d)
case t: String => Text(t, d)
case mp: Multipart if m.isMimeType("multipart/*") =>
Multi(0 until mp.getCount map(i => Content(mp.getBodyPart(i))) toList,
d)
case p: Part if m.isMimeType("message/rfc822") =>
Nested(Content(p), d)
case p: MimeBodyPart =>
val f = File.createTempFile("mail-", ".att", attDir)
p.saveFile(f)
Attachment(f.getAbsolutePath, d)
case is: InputStream =>
val f = File.createTempFile("mail-", ".att", attDir)
Files.copy(is, f.toPath, StandardCopyOption.REPLACE_EXISTING)
Attachment(f.getAbsolutePath, d)
case x => Unknown(x.getClass.getName, d)
}
}
}
case class EmailMessage(
envelope: Envelope,
content: Content
)
object EmailMessage {
def apply(m: Message)(implicit attDir: File): EmailMessage = {
EmailMessage(Envelope(m), Content(m))
}
}
@main
def main(
@doc("Email message file") mail: Path,
@doc("Output directory") outDir: Path): Unit = {
val session = Session.getInstance(System.getProperties)
val msg = new MimeMessage(session, new ByteArrayInputStream(read.bytes(mail)))
import upickle.default._
import upickle.Js
val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ")
implicit val w: default.Writer[Date] = Writer[Date](d => Js.Str(df.format(d)))
outDir.toIO.mkdirs()
val m = EmailMessage(msg)(outDir.toIO)
ammonite.ops.write.over(outDir / 'mail, write(m))
println(s"Written to $outDir")
}