Make this project a LOT simpler

This commit is contained in:
Aaron Fischer 2021-03-10 00:52:59 +01:00
parent 32fbe277aa
commit 17d8fc8ea6
8 changed files with 39 additions and 154 deletions

View file

@ -1,19 +0,0 @@
# About
Scrape the book list from the "Book of the month" project and generate a nice
looking visual representation of the book covers.
# Building
lein uberjar
java -jar target/buchdesmonats-1.7-standalone.jar
cp target/buchdesmonats-1.7-standalone.jar buchdesmonats-1.7.jar
docker build -t buchdesmonats .
# Running
Run this periodically:
docker run --rm -v "$PWD/public:/buchdesmonats/public/" buchdesmonats

View file

@ -9,7 +9,7 @@ list. This includes books and comics.
## Usage
$ target/buchdesmonats-1.7-standalone.jar [book|comic]
$ java -jar target/buchdesmonats-2.0-standalone.jar [book|comic]
## Docker
@ -19,12 +19,25 @@ the container stops. The Dockerfile is a multi stage dockerfile, which
first compiles the clojure files into a standalone jar file and then use
it to generate the book of the month content.
$ docker build . -t bdm:1.7
$ docker build . -t bdm:latest
Run this periodically:
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=book" bdm:1.7
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=comic" bdm:1.7
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=book" bdm:latest
$ docker run -it -v "$PWD/public:/app/public" -e "TYPE=comic" bdm:latest
If you want to run this in a cronjob, remove the ```-t``` from the docker run
command, because we do not have a tty.
## Update
$ git revert .
$ git pull
$ docker build . t bdm:latest
Keep in mind that the ```git revert .``` is needed because the bdm.jar file
itself will manipulate some of the templates in ```public/``` which will be
conflicted with the version in git.
## Authors

View file

@ -1,32 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>okoyono.de -- Buch des Monats</title>
<link rel="stylesheet" href="book.css">
<script src="vendors/jquery-2.1.0.min.js"></script>
<script src="vendors/masonry-3.1.5.pkgd.min.js"></script>
<script src="vendors/imagesloaded-3.1.4.pkgd.min.js"></script>
<script src="main.js"></script>
</head>
<body>
<h1>Buch des Monats</h1>
<p>Handerlesen und für gut befunden seit 2010
von <a href="https://social.okoyono.de/@mezzo" rel="author">Michael
Reutter</a>. Jeden Monat ein neues Buch aus seiner Sammlung zu den
Themengebieten Netzkultur, Geektum, Computerspiele und Cyberpunk. Diese
Bücherliste ist mittlerweile Anlaufstelle für so manchen Leser der nach
neuem Stoff sucht. Die Buchcover stammen
von <a href="http://lovelybooks.de/">Lovely
Books</a>, <a href="https://git.okoyono.de/mezzo/buch_des_monats">der
Code</a> von <a href="https://aaron-fischer.net/">Aaron Fischer</a>.
Ein <a href="https://okoyono.de/">økoyono</a> Projekt.</p>
<div id="covers">
<div class="cover-item">
<a href="#">
<img src="#" alt="LovelyBooks cover" title="Book title">
</a>
</div>
</body>
</html>

View file

@ -1,29 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>okoyono.de -- Comic des Monats</title>
<link rel="stylesheet" href="comic.css">
<script src="vendors/jquery-2.1.0.min.js"></script>
<script src="vendors/masonry-3.1.5.pkgd.min.js"></script>
<script src="vendors/imagesloaded-3.1.4.pkgd.min.js"></script>
<script src="main.js"></script>
</head>
<body>
<h1>Comic des Monats</h1>
<p>Handerlesen und für gut befunden seit 2018
von <a href="https://social.okoyono.de/@mezzo" rel="author">Michael
Reutter</a>. Jeden Monat ein neuer Comic aus seiner Sammlung. Die Buchcover stammen
von <a href="http://lovelybooks.de/">Lovely
Books</a>, <a href="https://git.okoyono.de/mezzomix/buch_des_monats">der
Code</a> von <a href="https://aaron-fischer.net/">Aaron Fischer</a>.
Ein <a href="https://okoyono.de/">økoyono</a> Projekt.</p>
<div id="covers">
<div class="cover-item">
<a href="#">
<img src="#" alt="LovelyBooks cover" title="Comic title">
</a>
</div>
</body>
</html>

View file

@ -20,29 +20,26 @@
[clojure.string :as str]
[clojure.java.io :as io]
[clojure.tools.logging :as log]
[me.raynes.fs :as fs]
[buchdesmonats.hash :as hash]
[buchdesmonats.sources.mojoreads :as mojoreads]
[buchdesmonats.sources.lovelybooks :as lovelybooks]))
[me.raynes.fs :as fs]))
(defn bookurl->imageurl [bookurl]
(let [url (java.net.URL. bookurl)
cover-url (case (.getHost url)
"www.lovelybooks.de" (lovelybooks/find-cover-image bookurl)
"lovelybooks.de" (lovelybooks/find-cover-image bookurl)
"mojoreads.com" (mojoreads/find-cover-image bookurl)
"mojoreads.de" (mojoreads/find-cover-image bookurl))]
(log/info "Using cover URL: " cover-url " ...")
cover-url))
(defn bookurl->isbn [url]
(re-find #"[0-9]+" url))
(defn isbn->imageurl [isbn]
(str "https://medien.ubitweb.de/bildzentrale_original/"
(subs isbn 0 3) "/"
(subs isbn 3 6) "/"
(subs isbn 6 9) "/"
(subs isbn 9) ".jpg"))
(defn imgurl->bytes [url]
(let [url-to-fetch (bookurl->imageurl url)
(let [isbn (bookurl->isbn url)
url-to-fetch (isbn->imageurl isbn)
stream (http-client/get url-to-fetch {:as :byte-array})]
(:body stream)))
; TODO: isbn statt hash verwenden
(defn url->file [url target-dir]
(io/file target-dir (str (hash/md5 url) ".jpg")))
(io/file target-dir (str (bookurl->isbn url) ".jpg")))
(defn scrape-book-urls [datasource-url]
(->> (http-client/get datasource-url {:insecure? true})
@ -64,22 +61,22 @@
(remove #(fs/exists? (url->file % target-dir))
(scrape-book-urls books-url)))
(defn cover-item-model-for-type [type]
(html/defsnippet cover-item-model (str "buchdesmonats/" type ".html") [:div#covers :> :div]
(defn cover-item-model-for-type [public-dir type]
(html/defsnippet cover-item-model (io/file public-dir (str type ".html")) [:div#covers :> :div]
[link title]
[:a] (html/set-attr :href link)
[:img] (html/set-attr :src (url->file link (str type "-covers")) :title title)))
(defn template-for-type [type]
(html/deftemplate book-template (str "buchdesmonats/" type ".html")
(defn template-for-type [public-dir type]
(html/deftemplate book-template (io/reader (io/file public-dir (str type ".html")))
[cover-urls]
[:#covers] (html/content
(map #((cover-item-model-for-type type) % "zu Lovely Books")
(map #((cover-item-model-for-type public-dir type) % "zu Lovely Books")
cover-urls))))
(defn generate-html [type book-urls target-dir]
(let [content (apply str ((template-for-type type) book-urls))]
(with-open [out (io/writer (io/file target-dir (str type ".html")))]
(defn generate-html [type book-urls public-dir]
(let [content (apply str ((template-for-type public-dir type) book-urls))]
(with-open [out (io/writer (io/file public-dir (str type ".html")))]
(.write out content))))
(defn -main [& args]
@ -88,7 +85,7 @@
(System/exit 1)))
(let [type (first args)
datasource-url (str "https://git.okoyono.de/mezzo/buch_des_monats/raw/master/" (clojure.string/upper-case type) ".mkd")
target-dir (io/file "public" (str type "-covers"))]
target-dir (io/file "public" (str type "-covers/"))]
(fs/mkdirs target-dir)
(generate-html type (scrape-book-urls datasource-url) "public")
(doall (pmap #(scrape-book-cover % target-dir)

View file

@ -1,11 +0,0 @@
(ns buchdesmonats.hash
(:import [java.security MessageDigest]
[java.math BigInteger]))
;;; Stolen from: https://gist.github.com/jizhang/4325757#gistcomment-2633984
(defn md5 [^String s]
(->> s
.getBytes
(.digest (MessageDigest/getInstance "MD5"))
(BigInteger. 1)
(format "%032x")))

View file

@ -1,21 +0,0 @@
(ns buchdesmonats.sources.lovelybooks
(:require [net.cgrand.enlive-html :as html]
[clojure.string :as str]))
(defn encode-url-part [part]
(java.net.URLEncoder/encode part "UTF-8"))
(defn encode-url [url]
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
author (nth parts 1)
book-title (nth parts 2)]
(str "https://lovelybooks.de/autor/" author "/" book-title)))
(defn find-cover-image [url]
(let [encoded-url (encode-url url)
src (-> (html/html-resource (java.net.URL. encoded-url))
(html/select [:img.ResponsiveImage.BookCover])
first
(get-in [:attrs :srcset])
(str/split #" "))]
(last (filter #(re-matches #"http.+\.jpg" %) src))))

View file

@ -1,13 +0,0 @@
(ns buchdesmonats.sources.mojoreads)
(defn find-cover-image [url]
(let [isbn (re-find #"[0-9]+" url)]
(str "https://medien.ubitweb.de/bildzentrale_original/"
(subs isbn 0 3)
"/"
(subs isbn 3 6)
"/"
(subs isbn 6 9)
"/"
(subs isbn 9)
".jpg")))