File file handling
This commit is contained in:
parent
003401b5f1
commit
e1d8943c96
2 changed files with 23 additions and 13 deletions
|
@ -7,4 +7,5 @@
|
||||||
[enlive "1.1.5"]
|
[enlive "1.1.5"]
|
||||||
[me.raynes/fs "1.4.4"]
|
[me.raynes/fs "1.4.4"]
|
||||||
[clj-http "0.9.1"]]
|
[clj-http "0.9.1"]]
|
||||||
:main buchdesmonats.core)
|
:main ^:skip-aot buchdesmonats.core
|
||||||
|
:profiles {:uberjar {:aot :all}})
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
(ns buchdesmonats.core
|
(ns buchdesmonats.core
|
||||||
|
(:gen-class)
|
||||||
(:require [net.cgrand.enlive-html :as html]
|
(:require [net.cgrand.enlive-html :as html]
|
||||||
[clj-http.client :as http-client]
|
[clj-http.client :as http-client]
|
||||||
[clojure.string :as str]
|
[clojure.string :as str]
|
||||||
[clojure.java.io :as io]))
|
[clojure.java.io :as io]
|
||||||
|
[me.raynes.fs :as fs]))
|
||||||
|
|
||||||
|
(def config {:books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"
|
||||||
|
:target-dir (io/file "public" "book-covers")})
|
||||||
|
|
||||||
(defn imgurl->bytes [lovelybooks-url]
|
(defn imgurl->bytes [lovelybooks-url]
|
||||||
(-> (java.net.URL. lovelybooks-url)
|
(-> (java.net.URL. lovelybooks-url)
|
||||||
|
@ -14,10 +19,10 @@
|
||||||
(http-client/get {:as :byte-array})
|
(http-client/get {:as :byte-array})
|
||||||
:body))
|
:body))
|
||||||
|
|
||||||
(defn url->filename [lovelybooks-url]
|
(defn url->file [lovelybooks-url target-dir]
|
||||||
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
||||||
(str/lower-case (str author "_" title ".jpg"))))
|
(io/file target-dir
|
||||||
|
(str/lower-case (str author "_" title ".jpg")))))
|
||||||
|
|
||||||
(defn scrape-book-urls [github-url]
|
(defn scrape-book-urls [github-url]
|
||||||
(->> (http-client/get github-url)
|
(->> (http-client/get github-url)
|
||||||
|
@ -26,13 +31,17 @@
|
||||||
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
|
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
|
||||||
(remove nil?)))
|
(remove nil?)))
|
||||||
|
|
||||||
(defn scrape-book-cover [url]
|
(defn scrape-book-cover [url target-dir]
|
||||||
(with-open [out (io/output-stream (url->filename url))]
|
(let [target-file (url->file url target-dir)]
|
||||||
(.write out (imgurl->bytes url))))
|
(with-open [out (io/output-stream target-file)]
|
||||||
|
(.write out (imgurl->bytes url)))))
|
||||||
|
|
||||||
|
(defn find-missing-covers [books-url target-dir]
|
||||||
|
(remove #(fs/exists? (url->file % target-dir))
|
||||||
|
(scrape-book-urls books-url)))
|
||||||
|
|
||||||
(defn -main [& args]
|
(defn -main [& args]
|
||||||
(let [books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"]
|
(fs/mkdirs (:target-dir config))
|
||||||
(doall (pmap #(scrape-book-cover %)
|
(doall (pmap #(scrape-book-cover % (:target-dir config))
|
||||||
(scrape-book-urls books-url)))
|
(find-missing-covers (:books-url config) (:target-dir config))))
|
||||||
true))
|
true)
|
||||||
|
|
Loading…
Reference in a new issue