2019-01-24 23:14:05 +01:00
|
|
|
;;; Copyright (C) 2014-2019 Aaron Fischer <mail@aaron-fischer.net>
|
2014-03-25 23:55:29 +01:00
|
|
|
;;;
|
|
|
|
;;; Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
|
|
;;; this software and associated documentation files (the "Software"), to deal in
|
|
|
|
;;; the Software without restriction, including without limitation the rights to
|
|
|
|
;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
|
|
;;; the Software, and to permit persons to whom the Software is furnished to do so,
|
|
|
|
;;; subject to the following conditions:
|
|
|
|
;;;
|
|
|
|
;;; The above copyright notice and this permission notice shall be included in all
|
|
|
|
;;; copies or substantial portions of the Software.
|
|
|
|
;;;
|
|
|
|
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
|
|
;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
|
|
;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
|
|
;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
2014-03-21 00:42:43 +01:00
|
|
|
(ns buchdesmonats.core
|
2014-03-24 21:55:07 +01:00
|
|
|
(:gen-class)
|
2014-03-21 00:42:43 +01:00
|
|
|
(:require [net.cgrand.enlive-html :as html]
|
|
|
|
[clj-http.client :as http-client]
|
|
|
|
[clojure.string :as str]
|
2014-03-24 21:55:07 +01:00
|
|
|
[clojure.java.io :as io]
|
2015-06-10 23:16:32 +02:00
|
|
|
[clojure.tools.logging :as log]
|
2014-03-24 21:55:07 +01:00
|
|
|
[me.raynes.fs :as fs]))
|
|
|
|
|
2014-03-21 00:42:43 +01:00
|
|
|
(defn imgurl->bytes [lovelybooks-url]
|
2018-07-13 23:31:00 +02:00
|
|
|
(let [urls (-> (java.net.URL. lovelybooks-url)
|
|
|
|
html/html-resource
|
2019-01-24 23:14:05 +01:00
|
|
|
(html/select [:img.ResponsiveImage.BookCover])
|
2018-07-13 23:31:00 +02:00
|
|
|
first
|
|
|
|
(get-in [:attrs :srcset])
|
|
|
|
(str/split #" "))
|
|
|
|
url-to-fetch (last (filter #(re-matches #"http.+\.jpg" %) urls))
|
|
|
|
stream (http-client/get url-to-fetch {:as :byte-array})]
|
|
|
|
(:body stream)))
|
|
|
|
|
2014-03-21 00:42:43 +01:00
|
|
|
|
2014-06-10 23:09:36 +02:00
|
|
|
(defn encode-url-part [part]
|
|
|
|
(java.net.URLEncoder/encode part "UTF-8"))
|
|
|
|
|
|
|
|
(defn encode-url [url]
|
|
|
|
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
|
|
|
author (nth parts 1)
|
|
|
|
book-title (nth parts 2)]
|
2018-07-13 23:31:00 +02:00
|
|
|
(str "https://lovelybooks.de/autor/" author "/" book-title)))
|
2014-06-10 23:09:36 +02:00
|
|
|
|
2014-03-24 21:55:07 +01:00
|
|
|
(defn url->file [lovelybooks-url target-dir]
|
2014-03-21 00:42:43 +01:00
|
|
|
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
2014-03-25 23:41:56 +01:00
|
|
|
(-> (str author "_" title ".jpg")
|
2014-06-10 23:09:36 +02:00
|
|
|
str/lower-case
|
|
|
|
(str/replace #"[^a-z0-9-_.]" "")
|
|
|
|
(#(io/file target-dir %)))))
|
2014-03-21 00:42:43 +01:00
|
|
|
|
2015-06-10 23:15:53 +02:00
|
|
|
(defn scrape-book-urls [datasource-url]
|
|
|
|
(->> (http-client/get datasource-url {:insecure? true})
|
2014-03-21 00:42:43 +01:00
|
|
|
:body
|
|
|
|
str/split-lines
|
|
|
|
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
|
|
|
|
(remove nil?)))
|
|
|
|
|
2014-03-24 21:55:07 +01:00
|
|
|
(defn scrape-book-cover [url target-dir]
|
2015-06-10 23:16:32 +02:00
|
|
|
(try
|
|
|
|
(let [target-file (url->file url target-dir)
|
|
|
|
encoded-url (encode-url url)
|
|
|
|
bytes (imgurl->bytes encoded-url)]
|
|
|
|
(with-open [out (io/output-stream target-file)]
|
|
|
|
(.write out bytes)))
|
|
|
|
(catch Exception e
|
|
|
|
(log/info "Problems with " url ", skip it."))))
|
2014-03-21 00:42:43 +01:00
|
|
|
|
2014-03-24 21:55:07 +01:00
|
|
|
(defn find-missing-covers [books-url target-dir]
|
|
|
|
(remove #(fs/exists? (url->file % target-dir))
|
|
|
|
(scrape-book-urls books-url)))
|
2014-03-25 00:05:46 +01:00
|
|
|
|
|
|
|
(html/defsnippet cover-item-model "buchdesmonats/layout.html" [:div#covers :> :div]
|
|
|
|
[link title]
|
|
|
|
[:a] (html/set-attr :href link)
|
|
|
|
[:img] (html/set-attr :src (url->file link "book-covers") :title title))
|
|
|
|
|
|
|
|
(html/deftemplate index-template "buchdesmonats/layout.html"
|
|
|
|
[cover-urls]
|
2014-03-25 23:41:56 +01:00
|
|
|
[:#covers] (html/content
|
|
|
|
(map #(cover-item-model % "zu Lovely Books")
|
|
|
|
cover-urls)))
|
2014-03-25 00:05:46 +01:00
|
|
|
|
|
|
|
(defn generate-html [book-urls target-dir]
|
|
|
|
(let [content (apply str (index-template book-urls))]
|
|
|
|
(with-open [out (io/writer (io/file "public" "index.html"))]
|
|
|
|
(.write out content))))
|
2018-07-13 23:31:00 +02:00
|
|
|
|
2014-03-21 00:42:43 +01:00
|
|
|
(defn -main [& args]
|
2015-06-10 23:16:32 +02:00
|
|
|
(let [datasource-url "https://git.okoyono.de/mezzomix/buch_des_monats/raw/master/README.mkd"
|
2014-03-25 23:41:56 +01:00
|
|
|
target-dir (io/file "public" "book-covers")]
|
|
|
|
(fs/mkdirs target-dir)
|
2015-06-10 23:16:32 +02:00
|
|
|
(generate-html (scrape-book-urls datasource-url) "public")
|
2014-03-25 23:41:56 +01:00
|
|
|
(doall (pmap #(scrape-book-cover % target-dir)
|
2015-06-10 23:16:32 +02:00
|
|
|
(find-missing-covers datasource-url target-dir)))
|
2014-03-25 23:55:29 +01:00
|
|
|
(System/exit 0)))
|