From e1d8943c9651f72f7ee8aa129d9b250b35c86a08 Mon Sep 17 00:00:00 2001 From: Aaron Mueller Date: Mon, 24 Mar 2014 21:55:07 +0100 Subject: [PATCH] File file handling --- project.clj | 3 ++- src/buchdesmonats/core.clj | 33 +++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/project.clj b/project.clj index 29bebea..8d69ae7 100644 --- a/project.clj +++ b/project.clj @@ -7,4 +7,5 @@ [enlive "1.1.5"] [me.raynes/fs "1.4.4"] [clj-http "0.9.1"]] - :main buchdesmonats.core) + :main ^:skip-aot buchdesmonats.core + :profiles {:uberjar {:aot :all}}) diff --git a/src/buchdesmonats/core.clj b/src/buchdesmonats/core.clj index ea662e5..59847e4 100644 --- a/src/buchdesmonats/core.clj +++ b/src/buchdesmonats/core.clj @@ -1,8 +1,13 @@ (ns buchdesmonats.core + (:gen-class) (:require [net.cgrand.enlive-html :as html] [clj-http.client :as http-client] [clojure.string :as str] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [me.raynes.fs :as fs])) + +(def config {:books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd" + :target-dir (io/file "public" "book-covers")}) (defn imgurl->bytes [lovelybooks-url] (-> (java.net.URL. lovelybooks-url) @@ -14,10 +19,10 @@ (http-client/get {:as :byte-array}) :body)) -(defn url->filename [lovelybooks-url] +(defn url->file [lovelybooks-url target-dir] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] - (str/lower-case (str author "_" title ".jpg")))) - + (io/file target-dir + (str/lower-case (str author "_" title ".jpg"))))) (defn scrape-book-urls [github-url] (->> (http-client/get github-url) @@ -26,13 +31,17 @@ (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %))) (remove nil?))) -(defn scrape-book-cover [url] - (with-open [out (io/output-stream (url->filename url))] - (.write out (imgurl->bytes url)))) - +(defn scrape-book-cover [url target-dir] + (let [target-file (url->file url target-dir)] + (with-open [out (io/output-stream target-file)] + (.write out (imgurl->bytes url))))) +(defn find-missing-covers [books-url target-dir] + (remove #(fs/exists? (url->file % target-dir)) + (scrape-book-urls books-url))) + (defn -main [& args] - (let [books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"] - (doall (pmap #(scrape-book-cover %) - (scrape-book-urls books-url))) - true)) + (fs/mkdirs (:target-dir config)) + (doall (pmap #(scrape-book-cover % (:target-dir config)) + (find-missing-covers (:books-url config) (:target-dir config)))) + true)