;;; Copyright (C) 2014-2015 Aaron Fischer ;;; ;;; Permission is hereby granted, free of charge, to any person obtaining a copy of ;;; this software and associated documentation files (the "Software"), to deal in ;;; the Software without restriction, including without limitation the rights to ;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of ;;; the Software, and to permit persons to whom the Software is furnished to do so, ;;; subject to the following conditions: ;;; ;;; The above copyright notice and this permission notice shall be included in all ;;; copies or substantial portions of the Software. ;;; ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS ;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR ;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER ;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (ns buchdesmonats.core (:gen-class) (:require [net.cgrand.enlive-html :as html] [clj-http.client :as http-client] [clojure.string :as str] [clojure.java.io :as io] [me.raynes.fs :as fs])) (defn imgurl->bytes [lovelybooks-url] (-> (java.net.URL. lovelybooks-url) html/html-resource (html/select [:div.bookcoverXXL :> :div :> :img]) first (get-in [:attrs :src]) (#(str "http:" %)) (http-client/get {:as :byte-array}) :body)) (defn encode-url-part [part] (java.net.URLEncoder/encode part "UTF-8")) (defn encode-url [url] (let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url)) author (nth parts 1) book-title (nth parts 2)] (str "http://lovelybooks.de/autor/" author "/" book-title))) (defn url->file [lovelybooks-url target-dir] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] (-> (str author "_" title ".jpg") str/lower-case (str/replace #"[^a-z0-9-_.]" "") (#(io/file target-dir %))))) (defn scrape-book-urls [github-url] (->> (http-client/get github-url) :body str/split-lines (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %))) (remove nil?))) (defn scrape-book-cover [url target-dir] (let [target-file (url->file url target-dir) encoded-url (encode-url url)] (with-open [out (io/output-stream target-file)] (.write out (imgurl->bytes encoded-url))))) (defn find-missing-covers [books-url target-dir] (remove #(fs/exists? (url->file % target-dir)) (scrape-book-urls books-url))) (html/defsnippet cover-item-model "buchdesmonats/layout.html" [:div#covers :> :div] [link title] [:a] (html/set-attr :href link) [:img] (html/set-attr :src (url->file link "book-covers") :title title)) (html/deftemplate index-template "buchdesmonats/layout.html" [cover-urls] [:#covers] (html/content (map #(cover-item-model % "zu Lovely Books") cover-urls))) (defn generate-html [book-urls target-dir] (let [content (apply str (index-template book-urls))] (with-open [out (io/writer (io/file "public" "index.html"))] (.write out content)))) (defn -main [& args] (let [github-url "https://git.okoyono.de/mezzomix/buch_des_monats/raw/master/README.mkd" target-dir (io/file "public" "book-covers")] (fs/mkdirs target-dir) (generate-html (scrape-book-urls github-url) "public") (doall (pmap #(scrape-book-cover % target-dir) (find-missing-covers github-url target-dir))) (System/exit 0)))