buch_des_monats/src/buchdesmonats/core.clj
2014-03-25 23:55:29 +01:00

85 lines
3.4 KiB
Clojure

;;; Copyright (C) 2014 Aaron Mueller <mail@aaron-mueller.de>
;;;
;;; Permission is hereby granted, free of charge, to any person obtaining a copy of
;;; this software and associated documentation files (the "Software"), to deal in
;;; the Software without restriction, including without limitation the rights to
;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
;;; the Software, and to permit persons to whom the Software is furnished to do so,
;;; subject to the following conditions:
;;;
;;; The above copyright notice and this permission notice shall be included in all
;;; copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
(ns buchdesmonats.core
(:gen-class)
(:require [net.cgrand.enlive-html :as html]
[clj-http.client :as http-client]
[clojure.string :as str]
[clojure.java.io :as io]
[me.raynes.fs :as fs]))
(defn imgurl->bytes [lovelybooks-url]
(-> (java.net.URL. lovelybooks-url)
html/html-resource
(html/select [:div.bookcoverXXL :> :div :> :img])
first
(get-in [:attrs :src])
(#(str "http:" %))
(http-client/get {:as :byte-array})
:body))
(defn url->file [lovelybooks-url target-dir]
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
(-> (str author "_" title ".jpg")
str/lower-case
(str/replace #"[^a-z0-9-_.]" "")
(#(io/file target-dir %)))))
(defn scrape-book-urls [github-url]
(->> (http-client/get github-url)
:body
str/split-lines
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
(remove nil?)))
(defn scrape-book-cover [url target-dir]
(let [target-file (url->file url target-dir)]
(with-open [out (io/output-stream target-file)]
(.write out (imgurl->bytes url)))))
(defn find-missing-covers [books-url target-dir]
(remove #(fs/exists? (url->file % target-dir))
(scrape-book-urls books-url)))
(html/defsnippet cover-item-model "buchdesmonats/layout.html" [:div#covers :> :div]
[link title]
[:a] (html/set-attr :href link)
[:img] (html/set-attr :src (url->file link "book-covers") :title title))
(html/deftemplate index-template "buchdesmonats/layout.html"
[cover-urls]
[:#covers] (html/content
(map #(cover-item-model % "zu Lovely Books")
cover-urls)))
(defn generate-html [book-urls target-dir]
(let [content (apply str (index-template book-urls))]
(with-open [out (io/writer (io/file "public" "index.html"))]
(.write out content))))
(defn -main [& args]
(let [github-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"
target-dir (io/file "public" "book-covers")]
(fs/mkdirs target-dir)
(generate-html (scrape-book-urls github-url) "public")
(doall (pmap #(scrape-book-cover % target-dir)
(find-missing-covers github-url target-dir)))
(System/exit 0)))