Make this project a LOT simpler

This commit is contained in:
Aaron Fischer 2021-03-10 00:52:59 +01:00
parent 32fbe277aa
commit 17d8fc8ea6
8 changed files with 39 additions and 154 deletions

View file

@ -1,19 +0,0 @@
# About
Scrape the book list from the "Book of the month" project and generate a nice
looking visual representation of the book covers.
# Building
lein uberjar
java -jar target/buchdesmonats-1.7-standalone.jar
cp target/buchdesmonats-1.7-standalone.jar buchdesmonats-1.7.jar
docker build -t buchdesmonats .
# Running
Run this periodically:
docker run --rm -v "$PWD/public:/buchdesmonats/public/" buchdesmonats

View file

@ -9,7 +9,7 @@ list. This includes books and comics.
## Usage ## Usage
$ target/buchdesmonats-1.7-standalone.jar [book|comic] $ java -jar target/buchdesmonats-2.0-standalone.jar [book|comic]
## Docker ## Docker
@ -19,12 +19,25 @@ the container stops. The Dockerfile is a multi stage dockerfile, which
first compiles the clojure files into a standalone jar file and then use first compiles the clojure files into a standalone jar file and then use
it to generate the book of the month content. it to generate the book of the month content.
$ docker build . -t bdm:1.7 $ docker build . -t bdm:latest
Run this periodically: Run this periodically:
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=book" bdm:1.7 $ docker run -it -v "$PWD/public:/app/public" -e "TYPE=book" bdm:latest
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=comic" bdm:1.7 $ docker run -it -v "$PWD/public:/app/public" -e "TYPE=comic" bdm:latest
If you want to run this in a cronjob, remove the ```-t``` from the docker run
command, because we do not have a tty.
## Update
$ git revert .
$ git pull
$ docker build . t bdm:latest
Keep in mind that the ```git revert .``` is needed because the bdm.jar file
itself will manipulate some of the templates in ```public/``` which will be
conflicted with the version in git.
## Authors ## Authors

View file

@ -1,32 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>okoyono.de -- Buch des Monats</title>
<link rel="stylesheet" href="book.css">
<script src="vendors/jquery-2.1.0.min.js"></script>
<script src="vendors/masonry-3.1.5.pkgd.min.js"></script>
<script src="vendors/imagesloaded-3.1.4.pkgd.min.js"></script>
<script src="main.js"></script>
</head>
<body>
<h1>Buch des Monats</h1>
<p>Handerlesen und für gut befunden seit 2010
von <a href="https://social.okoyono.de/@mezzo" rel="author">Michael
Reutter</a>. Jeden Monat ein neues Buch aus seiner Sammlung zu den
Themengebieten Netzkultur, Geektum, Computerspiele und Cyberpunk. Diese
Bücherliste ist mittlerweile Anlaufstelle für so manchen Leser der nach
neuem Stoff sucht. Die Buchcover stammen
von <a href="http://lovelybooks.de/">Lovely
Books</a>, <a href="https://git.okoyono.de/mezzo/buch_des_monats">der
Code</a> von <a href="https://aaron-fischer.net/">Aaron Fischer</a>.
Ein <a href="https://okoyono.de/">økoyono</a> Projekt.</p>
<div id="covers">
<div class="cover-item">
<a href="#">
<img src="#" alt="LovelyBooks cover" title="Book title">
</a>
</div>
</body>
</html>

View file

@ -1,29 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>okoyono.de -- Comic des Monats</title>
<link rel="stylesheet" href="comic.css">
<script src="vendors/jquery-2.1.0.min.js"></script>
<script src="vendors/masonry-3.1.5.pkgd.min.js"></script>
<script src="vendors/imagesloaded-3.1.4.pkgd.min.js"></script>
<script src="main.js"></script>
</head>
<body>
<h1>Comic des Monats</h1>
<p>Handerlesen und für gut befunden seit 2018
von <a href="https://social.okoyono.de/@mezzo" rel="author">Michael
Reutter</a>. Jeden Monat ein neuer Comic aus seiner Sammlung. Die Buchcover stammen
von <a href="http://lovelybooks.de/">Lovely
Books</a>, <a href="https://git.okoyono.de/mezzomix/buch_des_monats">der
Code</a> von <a href="https://aaron-fischer.net/">Aaron Fischer</a>.
Ein <a href="https://okoyono.de/">økoyono</a> Projekt.</p>
<div id="covers">
<div class="cover-item">
<a href="#">
<img src="#" alt="LovelyBooks cover" title="Comic title">
</a>
</div>
</body>
</html>

View file

@ -20,29 +20,26 @@
[clojure.string :as str] [clojure.string :as str]
[clojure.java.io :as io] [clojure.java.io :as io]
[clojure.tools.logging :as log] [clojure.tools.logging :as log]
[me.raynes.fs :as fs] [me.raynes.fs :as fs]))
[buchdesmonats.hash :as hash]
[buchdesmonats.sources.mojoreads :as mojoreads]
[buchdesmonats.sources.lovelybooks :as lovelybooks]))
(defn bookurl->imageurl [bookurl] (defn bookurl->isbn [url]
(let [url (java.net.URL. bookurl) (re-find #"[0-9]+" url))
cover-url (case (.getHost url)
"www.lovelybooks.de" (lovelybooks/find-cover-image bookurl) (defn isbn->imageurl [isbn]
"lovelybooks.de" (lovelybooks/find-cover-image bookurl) (str "https://medien.ubitweb.de/bildzentrale_original/"
"mojoreads.com" (mojoreads/find-cover-image bookurl) (subs isbn 0 3) "/"
"mojoreads.de" (mojoreads/find-cover-image bookurl))] (subs isbn 3 6) "/"
(log/info "Using cover URL: " cover-url " ...") (subs isbn 6 9) "/"
cover-url)) (subs isbn 9) ".jpg"))
(defn imgurl->bytes [url] (defn imgurl->bytes [url]
(let [url-to-fetch (bookurl->imageurl url) (let [isbn (bookurl->isbn url)
url-to-fetch (isbn->imageurl isbn)
stream (http-client/get url-to-fetch {:as :byte-array})] stream (http-client/get url-to-fetch {:as :byte-array})]
(:body stream))) (:body stream)))
; TODO: isbn statt hash verwenden
(defn url->file [url target-dir] (defn url->file [url target-dir]
(io/file target-dir (str (hash/md5 url) ".jpg"))) (io/file target-dir (str (bookurl->isbn url) ".jpg")))
(defn scrape-book-urls [datasource-url] (defn scrape-book-urls [datasource-url]
(->> (http-client/get datasource-url {:insecure? true}) (->> (http-client/get datasource-url {:insecure? true})
@ -64,22 +61,22 @@
(remove #(fs/exists? (url->file % target-dir)) (remove #(fs/exists? (url->file % target-dir))
(scrape-book-urls books-url))) (scrape-book-urls books-url)))
(defn cover-item-model-for-type [type] (defn cover-item-model-for-type [public-dir type]
(html/defsnippet cover-item-model (str "buchdesmonats/" type ".html") [:div#covers :> :div] (html/defsnippet cover-item-model (io/file public-dir (str type ".html")) [:div#covers :> :div]
[link title] [link title]
[:a] (html/set-attr :href link) [:a] (html/set-attr :href link)
[:img] (html/set-attr :src (url->file link (str type "-covers")) :title title))) [:img] (html/set-attr :src (url->file link (str type "-covers")) :title title)))
(defn template-for-type [type] (defn template-for-type [public-dir type]
(html/deftemplate book-template (str "buchdesmonats/" type ".html") (html/deftemplate book-template (io/reader (io/file public-dir (str type ".html")))
[cover-urls] [cover-urls]
[:#covers] (html/content [:#covers] (html/content
(map #((cover-item-model-for-type type) % "zu Lovely Books") (map #((cover-item-model-for-type public-dir type) % "zu Lovely Books")
cover-urls)))) cover-urls))))
(defn generate-html [type book-urls target-dir] (defn generate-html [type book-urls public-dir]
(let [content (apply str ((template-for-type type) book-urls))] (let [content (apply str ((template-for-type public-dir type) book-urls))]
(with-open [out (io/writer (io/file target-dir (str type ".html")))] (with-open [out (io/writer (io/file public-dir (str type ".html")))]
(.write out content)))) (.write out content))))
(defn -main [& args] (defn -main [& args]
@ -88,7 +85,7 @@
(System/exit 1))) (System/exit 1)))
(let [type (first args) (let [type (first args)
datasource-url (str "https://git.okoyono.de/mezzo/buch_des_monats/raw/master/" (clojure.string/upper-case type) ".mkd") datasource-url (str "https://git.okoyono.de/mezzo/buch_des_monats/raw/master/" (clojure.string/upper-case type) ".mkd")
target-dir (io/file "public" (str type "-covers"))] target-dir (io/file "public" (str type "-covers/"))]
(fs/mkdirs target-dir) (fs/mkdirs target-dir)
(generate-html type (scrape-book-urls datasource-url) "public") (generate-html type (scrape-book-urls datasource-url) "public")
(doall (pmap #(scrape-book-cover % target-dir) (doall (pmap #(scrape-book-cover % target-dir)

View file

@ -1,11 +0,0 @@
(ns buchdesmonats.hash
(:import [java.security MessageDigest]
[java.math BigInteger]))
;;; Stolen from: https://gist.github.com/jizhang/4325757#gistcomment-2633984
(defn md5 [^String s]
(->> s
.getBytes
(.digest (MessageDigest/getInstance "MD5"))
(BigInteger. 1)
(format "%032x")))

View file

@ -1,21 +0,0 @@
(ns buchdesmonats.sources.lovelybooks
(:require [net.cgrand.enlive-html :as html]
[clojure.string :as str]))
(defn encode-url-part [part]
(java.net.URLEncoder/encode part "UTF-8"))
(defn encode-url [url]
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
author (nth parts 1)
book-title (nth parts 2)]
(str "https://lovelybooks.de/autor/" author "/" book-title)))
(defn find-cover-image [url]
(let [encoded-url (encode-url url)
src (-> (html/html-resource (java.net.URL. encoded-url))
(html/select [:img.ResponsiveImage.BookCover])
first
(get-in [:attrs :srcset])
(str/split #" "))]
(last (filter #(re-matches #"http.+\.jpg" %) src))))

View file

@ -1,13 +0,0 @@
(ns buchdesmonats.sources.mojoreads)
(defn find-cover-image [url]
(let [isbn (re-find #"[0-9]+" url)]
(str "https://medien.ubitweb.de/bildzentrale_original/"
(subs isbn 0 3)
"/"
(subs isbn 3 6)
"/"
(subs isbn 6 9)
"/"
(subs isbn 9)
".jpg")))