;;; Copyright (C) 2014-2020 Aaron Fischer Permission ;;; is hereby granted, free of charge, to any person obtaining a copy of this ;;; software and associated documentation files (the "Software"), to deal in the ;;; Software without restriction, including without limitation the rights to ;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell ;;; copies of the Software, and to permit persons to whom the Software is ;;; furnished to do so, subject to the following conditions: The above ;;; copyright notice and this permission notice shall be included in all copies ;;; or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS ;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR ;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER ;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (ns buchdesmonats.core (:gen-class) (:require [net.cgrand.enlive-html :as html] [clj-http.client :as http-client] [clojure.string :as str] [clojure.java.io :as io] [clojure.tools.logging :as log] [me.raynes.fs :as fs] [buchdesmonats.hash :as hash] [buchdesmonats.sources.mojoreads :as mojoreads] [buchdesmonats.sources.lovelybooks :as lovelybooks])) (defn bookurl->imageurl [bookurl] (let [url (java.net.URL. bookurl)] (case (.getHost url) "www.lovelybooks.de" (lovelybooks/find-cover-image bookurl) "lovelybooks.de" (lovelybooks/find-cover-image bookurl) "mojoreads.com" (mojoreads/find-cover-image bookurl) "mojoreads.de" (mojoreads/find-cover-image bookurl)))) (defn imgurl->bytes [url] (let [url-to-fetch (bookurl->imageurl url) stream (http-client/get url-to-fetch {:as :byte-array})] (:body stream))) (defn url->file [url target-dir] (io/file target-dir (str (hash/md5 url) ".jpg"))) (defn scrape-book-urls [datasource-url] (->> (http-client/get datasource-url {:insecure? true}) :body str/split-lines (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %))) (remove nil?))) (defn scrape-book-cover [url target-dir] (try (let [target-file (url->file url target-dir) bytes (imgurl->bytes url)] (with-open [out (io/output-stream target-file)] (.write out bytes))) (catch Exception e (log/info "Problem with " url ":" (get-in e [:via :message]) ". Skip it.")))) (defn find-missing-covers [books-url target-dir] (remove #(fs/exists? (url->file % target-dir)) (scrape-book-urls books-url))) (defn cover-item-model-for-type [type] (html/defsnippet cover-item-model (str "buchdesmonats/" type ".html") [:div#covers :> :div] [link title] [:a] (html/set-attr :href link) [:img] (html/set-attr :src (url->file link (str type "-covers")) :title title))) (defn template-for-type [type] (html/deftemplate book-template (str "buchdesmonats/" type ".html") [cover-urls] [:#covers] (html/content (map #((cover-item-model-for-type type) % "zu Lovely Books") cover-urls)))) (defn generate-html [type book-urls target-dir] (let [content (apply str ((template-for-type type) book-urls))] (with-open [out (io/writer (io/file target-dir (str type ".html")))] (.write out content)))) (defn -main [& args] (let [type (first args) datasource-url (str "https://git.okoyono.de/mezzo/buch_des_monats/raw/master/" (clojure.string/upper-case type) ".mkd") target-dir (io/file "public" (str type "-covers"))] (fs/mkdirs target-dir) (generate-html type (scrape-book-urls datasource-url) "public") (doall (pmap #(scrape-book-cover % target-dir) (find-missing-covers datasource-url target-dir))) (System/exit 0)))