clojurecup2015-mailhead/src/mailhead/parser.clj

42 lines
1.5 KiB
Clojure
Raw Normal View History

2015-12-05 11:32:31 +01:00
(ns mailhead.parser
2015-12-05 14:17:05 +01:00
(:require [clojure.string :as st]
[clj-time.core :as time]
[clj-time.coerce :as timec])
2015-12-05 11:32:31 +01:00
(:use [clojure.contrib.java-utils :only [as-properties]]
2015-12-05 14:17:05 +01:00
[clojure.java.io :only [input-stream file]])
2015-12-05 11:32:31 +01:00
(:import
(javax.mail Session)
(javax.mail.internet MimeMessage)
(javax.mail.internet InternetHeaders)))
(def session
(Session/getDefaultInstance
(as-properties [["mail.store.protocol" "imaps"]])))
(defn all-headers [message]
2015-12-05 14:17:05 +01:00
(enumeration-seq (:allHeaders (bean message))))
2015-12-05 11:32:31 +01:00
2015-12-05 14:17:05 +01:00
(defn received-headers [all-headers]
(map #(.getValue %) (filter (fn [header] (= "Received" (.getName header))) all-headers)))
(defn parse-received-headers [received-header]
2015-12-05 14:17:05 +01:00
(let [datetime (str/trim (last (st/split received-header #";")))
;; Hacky hacky, but no time to make this proper ...
cleaned-date (st/trim (st/replace datetime #"(\(.*\)| )" " "))]
{:sender (nth (re-find #"\s*from ([^ ]+)" received-header) 1)
:receiver (nth (re-find #"\s*by ([^ ]+)" received-header) 1)
:time (timec/from-string cleaned-date)}))
(defn extract-relevant-stuff [mime-message]
(->> mime-message
all-headers
received-headers
(map parse-received-headers)))
2015-12-05 11:32:31 +01:00
(defn parse-from-string [email-string]
(let [email-stream (java.io.ByteArrayInputStream. (.getBytes email-string))]
(extract-relevant-stuff (MimeMessage. session email-stream))))
2015-12-05 11:32:31 +01:00
(defn parse-from-file [filename]
(extract-relevant-stuff (MimeMessage. session (input-stream filename))))