buch_des_monats/src/buchdesmonats/core.clj

94 lines
3.9 KiB
Clojure
Raw Normal View History

;;; Copyright (C) 2014-2021 Aaron Fischer <mail@aaron-fischer.net> Permission
;;; is hereby granted, free of charge, to any person obtaining a copy of this
;;; software and associated documentation files (the "Software"), to deal in the
;;; Software without restriction, including without limitation the rights to
;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell
;;; copies of the Software, and to permit persons to whom the Software is
;;; furnished to do so, subject to the following conditions: The above
;;; copyright notice and this permission notice shall be included in all copies
;;; or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2014-03-25 23:55:29 +01:00
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2014-03-21 00:42:43 +01:00
(ns buchdesmonats.core
2014-03-24 21:55:07 +01:00
(:gen-class)
2014-03-21 00:42:43 +01:00
(:require [net.cgrand.enlive-html :as html]
[clj-http.client :as http-client]
[clojure.string :as str]
2014-03-24 21:55:07 +01:00
[clojure.java.io :as io]
2015-06-10 23:16:32 +02:00
[clojure.tools.logging :as log]
2021-03-10 00:52:59 +01:00
[me.raynes.fs :as fs]))
2014-03-24 21:55:07 +01:00
2021-03-10 00:52:59 +01:00
(defn bookurl->isbn [url]
(re-find #"[0-9]+" url))
(defn isbn->imageurl [isbn]
(str "https://medien.ubitweb.de/bildzentrale_original/"
(subs isbn 0 3) "/"
(subs isbn 3 6) "/"
(subs isbn 6 9) "/"
(subs isbn 9) ".jpg"))
(defn imgurl->bytes [url]
2021-03-10 00:52:59 +01:00
(let [isbn (bookurl->isbn url)
url-to-fetch (isbn->imageurl isbn)
stream (http-client/get url-to-fetch {:as :byte-array})]
(:body stream)))
(defn url->file [url target-dir]
2021-03-10 00:52:59 +01:00
(io/file target-dir (str (bookurl->isbn url) ".jpg")))
2014-03-21 00:42:43 +01:00
2015-06-10 23:15:53 +02:00
(defn scrape-book-urls [datasource-url]
(->> (http-client/get datasource-url {:insecure? true})
2014-03-21 00:42:43 +01:00
:body
str/split-lines
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
(remove nil?)))
2014-03-24 21:55:07 +01:00
(defn scrape-book-cover [url target-dir]
2015-06-10 23:16:32 +02:00
(try
(let [target-file (url->file url target-dir)
bytes (imgurl->bytes url)]
2015-06-10 23:16:32 +02:00
(with-open [out (io/output-stream target-file)]
(.write out bytes)))
(catch Exception e
(log/info "Problem with " url ":" e ". Skip it."))))
2014-03-21 00:42:43 +01:00
2014-03-24 21:55:07 +01:00
(defn find-missing-covers [books-url target-dir]
(remove #(fs/exists? (url->file % target-dir))
(scrape-book-urls books-url)))
2014-03-25 00:05:46 +01:00
2021-03-10 00:52:59 +01:00
(defn cover-item-model-for-type [public-dir type]
(html/defsnippet cover-item-model (io/file public-dir (str type ".html")) [:div#covers :> :div]
2014-03-25 00:05:46 +01:00
[link title]
[:a] (html/set-attr :href link)
[:img] (html/set-attr :src (url->file link (str type "-covers")) :title title)))
2014-03-25 00:05:46 +01:00
2021-03-10 00:52:59 +01:00
(defn template-for-type [public-dir type]
(html/deftemplate book-template (io/reader (io/file public-dir (str type ".html")))
2014-03-25 00:05:46 +01:00
[cover-urls]
2014-03-25 23:41:56 +01:00
[:#covers] (html/content
2021-03-10 00:52:59 +01:00
(map #((cover-item-model-for-type public-dir type) % "zu Lovely Books")
cover-urls))))
2014-03-25 00:05:46 +01:00
2021-03-10 00:52:59 +01:00
(defn generate-html [type book-urls public-dir]
(let [content (apply str ((template-for-type public-dir type) book-urls))]
(with-open [out (io/writer (io/file public-dir (str type ".html")))]
2014-03-25 00:05:46 +01:00
(.write out content))))
2014-03-21 00:42:43 +01:00
(defn -main [& args]
(if (empty? args)
(do (log/fatal "Please give a cover type (comic/book)")
(System/exit 1)))
(let [type (first args)
datasource-url (str "https://git.okoyono.de/mezzo/buch_des_monats/raw/master/" (clojure.string/upper-case type) ".mkd")
2021-03-10 00:52:59 +01:00
target-dir (io/file "public" (str type "-covers/"))]
2014-03-25 23:41:56 +01:00
(fs/mkdirs target-dir)
(generate-html type (scrape-book-urls datasource-url) "public")
2014-03-25 23:41:56 +01:00
(doall (pmap #(scrape-book-cover % target-dir)
2015-06-10 23:16:32 +02:00
(find-missing-covers datasource-url target-dir)))
2014-03-25 23:55:29 +01:00
(System/exit 0)))