File file handling

This commit is contained in:
Aaron Mueller 2014-03-24 21:55:07 +01:00
parent 003401b5f1
commit e1d8943c96
2 changed files with 23 additions and 13 deletions

View file

@ -7,4 +7,5 @@
[enlive "1.1.5"] [enlive "1.1.5"]
[me.raynes/fs "1.4.4"] [me.raynes/fs "1.4.4"]
[clj-http "0.9.1"]] [clj-http "0.9.1"]]
:main buchdesmonats.core) :main ^:skip-aot buchdesmonats.core
:profiles {:uberjar {:aot :all}})

View file

@ -1,8 +1,13 @@
(ns buchdesmonats.core (ns buchdesmonats.core
(:gen-class)
(:require [net.cgrand.enlive-html :as html] (:require [net.cgrand.enlive-html :as html]
[clj-http.client :as http-client] [clj-http.client :as http-client]
[clojure.string :as str] [clojure.string :as str]
[clojure.java.io :as io])) [clojure.java.io :as io]
[me.raynes.fs :as fs]))
(def config {:books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"
:target-dir (io/file "public" "book-covers")})
(defn imgurl->bytes [lovelybooks-url] (defn imgurl->bytes [lovelybooks-url]
(-> (java.net.URL. lovelybooks-url) (-> (java.net.URL. lovelybooks-url)
@ -14,10 +19,10 @@
(http-client/get {:as :byte-array}) (http-client/get {:as :byte-array})
:body)) :body))
(defn url->filename [lovelybooks-url] (defn url->file [lovelybooks-url target-dir]
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
(str/lower-case (str author "_" title ".jpg")))) (io/file target-dir
(str/lower-case (str author "_" title ".jpg")))))
(defn scrape-book-urls [github-url] (defn scrape-book-urls [github-url]
(->> (http-client/get github-url) (->> (http-client/get github-url)
@ -26,13 +31,17 @@
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %))) (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
(remove nil?))) (remove nil?)))
(defn scrape-book-cover [url] (defn scrape-book-cover [url target-dir]
(with-open [out (io/output-stream (url->filename url))] (let [target-file (url->file url target-dir)]
(.write out (imgurl->bytes url)))) (with-open [out (io/output-stream target-file)]
(.write out (imgurl->bytes url)))))
(defn find-missing-covers [books-url target-dir]
(remove #(fs/exists? (url->file % target-dir))
(scrape-book-urls books-url)))
(defn -main [& args] (defn -main [& args]
(let [books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"] (fs/mkdirs (:target-dir config))
(doall (pmap #(scrape-book-cover %) (doall (pmap #(scrape-book-cover % (:target-dir config))
(scrape-book-urls books-url))) (find-missing-covers (:books-url config) (:target-dir config))))
true)) true)