65 lines
2.3 KiB
Clojure
65 lines
2.3 KiB
Clojure
(ns buchdesmonats.core
|
|
(:gen-class)
|
|
(:require [net.cgrand.enlive-html :as html]
|
|
[clj-http.client :as http-client]
|
|
[clojure.string :as str]
|
|
[clojure.java.io :as io]
|
|
[me.raynes.fs :as fs]))
|
|
|
|
(def *config* {:books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"
|
|
:target-dir (io/file "public" "book-covers")})
|
|
|
|
(defn imgurl->bytes [lovelybooks-url]
|
|
(-> (java.net.URL. lovelybooks-url)
|
|
html/html-resource
|
|
(html/select [:div.bookcoverXXL :> :div :> :img])
|
|
first
|
|
(get-in [:attrs :src])
|
|
(#(str "http:" %))
|
|
(http-client/get {:as :byte-array})
|
|
:body))
|
|
|
|
(defn url->file [lovelybooks-url target-dir]
|
|
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
|
(io/file target-dir
|
|
(str/lower-case (str author "_" title ".jpg")))))
|
|
|
|
(defn scrape-book-urls [github-url]
|
|
(->> (http-client/get github-url)
|
|
:body
|
|
str/split-lines
|
|
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
|
|
(remove nil?)))
|
|
|
|
(defn scrape-book-cover [url target-dir]
|
|
(let [target-file (url->file url target-dir)]
|
|
(with-open [out (io/output-stream target-file)]
|
|
(.write out (imgurl->bytes url)))))
|
|
|
|
(defn find-missing-covers [books-url target-dir]
|
|
(remove #(fs/exists? (url->file % target-dir))
|
|
(scrape-book-urls books-url)))
|
|
|
|
(html/defsnippet cover-item-model "buchdesmonats/layout.html" [:div#covers :> :div]
|
|
[link title]
|
|
[:a] (html/set-attr :href link)
|
|
[:img] (html/set-attr :src (url->file link "book-covers") :title title))
|
|
|
|
(html/deftemplate index-template "buchdesmonats/layout.html"
|
|
[cover-urls]
|
|
[:#covers] (html/content (map
|
|
#(cover-item-model % "To LovelyBooks")
|
|
cover-urls)))
|
|
|
|
(defn generate-html [book-urls target-dir]
|
|
(let [content (apply str (index-template book-urls))]
|
|
(with-open [out (io/writer (io/file "public" "index.html"))]
|
|
(.write out content))))
|
|
|
|
(defn -main [& args]
|
|
(fs/mkdirs (:target-dir *config*))
|
|
;;; TODO: refactor the whole config shit
|
|
(generate-html (scrape-book-urls (:books-url *config*)) "public")
|
|
(doall (pmap #(scrape-book-cover % (:target-dir *config*))
|
|
(find-missing-covers (:books-url *config*) (:target-dir *config*))))
|
|
true)
|