(ns mailhead.parser (:require [clojure.string :as st] [clj-time.core :as time] [clj-time.coerce :as timec]) (:use [clojure.contrib.java-utils :only [as-properties]] [clojure.java.io :only [input-stream file]]) (:import (javax.mail Session) (javax.mail.internet MimeMessage) (javax.mail.internet InternetHeaders))) (def session (Session/getDefaultInstance (as-properties [["mail.store.protocol" "imaps"]]))) (defn all-headers [message] (enumeration-seq (:allHeaders (bean message)))) (defn received-headers [all-headers] (map #(.getValue %) (filter (fn [header] (= "Received" (.getName header))) all-headers))) (defn parse-received-headers [received-header] (let [datetime (str/trim (last (st/split received-header #";"))) ;; Hacky hacky, but no time to make this proper ... cleaned-date (st/trim (st/replace datetime #"(\(.*\)| )" " "))] {:sender (nth (re-find #"\s*from ([^ ]+)" received-header) 1) :receiver (nth (re-find #"\s*by ([^ ]+)" received-header) 1) :time (timec/from-string cleaned-date)})) (defn extract-relevant-stuff [mime-message] (->> mime-message all-headers received-headers (map parse-received-headers))) (defn parse-from-string [email-string] (let [email-stream (java.io.ByteArrayInputStream. (.getBytes email-string))] (extract-relevant-stuff (MimeMessage. session email-stream)))) (defn parse-from-file [filename] (extract-relevant-stuff (MimeMessage. session (input-stream filename))))