Generalize codebase and add more datasources.
Fixes #9 Relates #8 * Support mojoreads as a new data source * Change image file names to hashsums to be more generic * Clean up the source code by separate relates stuff in own files * Fix the Chaos Monkeys book
This commit is contained in:
parent
dd183627df
commit
08f504f303
6 changed files with 82 additions and 43 deletions
2
BOOK.mkd
2
BOOK.mkd
|
@ -24,7 +24,7 @@
|
|||
* September: [Andreas Eschbach "Das größte Abenteuer"](https://www.lovelybooks.de/autor/Andreas-Eschbach/Perry-Rhodan-Das-gr%C3%B6%C3%9Fte-Abenteuer-1955713268-w/)
|
||||
* Oktober: [Christiane-Frohmann "Präraffaelitische Girls erklären das Internet"](https://www.lovelybooks.de/autor/Christiane-Frohmann/Pr%C3%A4raffaelitische-Girls-erkl%C3%A4ren-das-Internet-1499687993-w/)
|
||||
* November: [Helen Stelthove "Pretty Fly For A Wifi"](https://www.lovelybooks.de/autor/Helen-Stelthove/Pretty-Fly-For-A-Wifi-2032951731-w/)
|
||||
* Dezember: [Antonio Garcia Martinez "Chaos Monkeys"](https://www.lovelybooks.de/autor/Antonio-Garcia-Martinez/Chaos-Monkeys-1565378792-w/)
|
||||
* Dezember: [Antonio Garcia Martinez "Chaos Monkeys"](https://mojoreads.de/book/Chaos-MonkeysAntonio-Garcia-Martinez/361610)
|
||||
|
||||
## 2018
|
||||
|
||||
|
|
14
public/index.html
Normal file
14
public/index.html
Normal file
|
@ -0,0 +1,14 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>okoyono.de -- Buch des Monats</title>
|
||||
<link rel="stylesheet" href="book.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>Buch des Monats</h1>
|
||||
|
||||
<a href="comic.html">Comic des Monats</a>
|
||||
<a href="book.html">Buch des Monats</a>
|
||||
</body>
|
||||
</html>
|
|
@ -1,16 +1,12 @@
|
|||
;;; Copyright (C) 2014-2019 Aaron Fischer <mail@aaron-fischer.net>
|
||||
;;;
|
||||
;;; Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
;;; this software and associated documentation files (the "Software"), to deal in
|
||||
;;; the Software without restriction, including without limitation the rights to
|
||||
;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
;;; the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
;;; subject to the following conditions:
|
||||
;;;
|
||||
;;; The above copyright notice and this permission notice shall be included in all
|
||||
;;; copies or substantial portions of the Software.
|
||||
;;;
|
||||
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
;;; Copyright (C) 2014-2020 Aaron Fischer <mail@aaron-fischer.net> Permission
|
||||
;;; is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
;;; software and associated documentation files (the "Software"), to deal in the
|
||||
;;; Software without restriction, including without limitation the rights to
|
||||
;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
;;; copies of the Software, and to permit persons to whom the Software is
|
||||
;;; furnished to do so, subject to the following conditions: The above
|
||||
;;; copyright notice and this permission notice shall be included in all copies
|
||||
;;; or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
|
@ -24,35 +20,26 @@
|
|||
[clojure.string :as str]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.tools.logging :as log]
|
||||
[me.raynes.fs :as fs]))
|
||||
[me.raynes.fs :as fs]
|
||||
[buchdesmonats.hash :as hash]
|
||||
[buchdesmonats.sources.mojoreads :as mojoreads]
|
||||
[buchdesmonats.sources.lovelybooks :as lovelybooks]))
|
||||
|
||||
;; TODO: Make it compatible to other book cover sites
|
||||
(defn imgurl->bytes [lovelybooks-url]
|
||||
(let [urls (-> (java.net.URL. lovelybooks-url)
|
||||
html/html-resource
|
||||
(html/select [:img.ResponsiveImage.BookCover])
|
||||
first
|
||||
(get-in [:attrs :srcset])
|
||||
(str/split #" "))
|
||||
url-to-fetch (last (filter #(re-matches #"http.+\.jpg" %) urls))
|
||||
(defn bookurl->imageurl [bookurl]
|
||||
(let [url (java.net.URL. bookurl)]
|
||||
(case (.getHost url)
|
||||
"www.lovelybooks.de" (lovelybooks/find-cover-image bookurl)
|
||||
"lovelybooks.de" (lovelybooks/find-cover-image bookurl)
|
||||
"mojoreads.com" (mojoreads/find-cover-image bookurl)
|
||||
"mojoreads.de" (mojoreads/find-cover-image bookurl))))
|
||||
|
||||
(defn imgurl->bytes [url]
|
||||
(let [url-to-fetch (bookurl->imageurl url)
|
||||
stream (http-client/get url-to-fetch {:as :byte-array})]
|
||||
(:body stream)))
|
||||
|
||||
(defn encode-url-part [part]
|
||||
(java.net.URLEncoder/encode part "UTF-8"))
|
||||
|
||||
(defn encode-url [url]
|
||||
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
||||
author (nth parts 1)
|
||||
book-title (nth parts 2)]
|
||||
(str "https://lovelybooks.de/autor/" author "/" book-title)))
|
||||
|
||||
(defn url->file [lovelybooks-url target-dir]
|
||||
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
||||
(-> (str author "_" title ".jpg")
|
||||
str/lower-case
|
||||
(str/replace #"[^a-z0-9-_.]" "")
|
||||
(#(io/file target-dir %)))))
|
||||
(defn url->file [url target-dir]
|
||||
(io/file target-dir (str (hash/md5 url) ".jpg")))
|
||||
|
||||
(defn scrape-book-urls [datasource-url]
|
||||
(->> (http-client/get datasource-url {:insecure? true})
|
||||
|
@ -64,13 +51,11 @@
|
|||
(defn scrape-book-cover [url target-dir]
|
||||
(try
|
||||
(let [target-file (url->file url target-dir)
|
||||
encoded-url (encode-url url)
|
||||
bytes (imgurl->bytes encoded-url)]
|
||||
bytes (imgurl->bytes url)]
|
||||
(with-open [out (io/output-stream target-file)]
|
||||
(.write out bytes)))
|
||||
(catch Exception e
|
||||
;; TODO: Better exception (examine e?)
|
||||
(log/info "Problems with " url ", skip it."))))
|
||||
(log/info "Problem with " url ":" (get-in e [:via :message]) ". Skip it."))))
|
||||
|
||||
(defn find-missing-covers [books-url target-dir]
|
||||
(remove #(fs/exists? (url->file % target-dir))
|
||||
|
|
11
src/buchdesmonats/hash.clj
Normal file
11
src/buchdesmonats/hash.clj
Normal file
|
@ -0,0 +1,11 @@
|
|||
(ns buchdesmonats.hash
|
||||
(:import [java.security MessageDigest]
|
||||
[java.math BigInteger]))
|
||||
|
||||
;;; Stolen from: https://gist.github.com/jizhang/4325757#gistcomment-2633984
|
||||
(defn md5 [^String s]
|
||||
(->> s
|
||||
.getBytes
|
||||
(.digest (MessageDigest/getInstance "MD5"))
|
||||
(BigInteger. 1)
|
||||
(format "%032x")))
|
21
src/buchdesmonats/sources/lovelybooks.clj
Normal file
21
src/buchdesmonats/sources/lovelybooks.clj
Normal file
|
@ -0,0 +1,21 @@
|
|||
(ns buchdesmonats.sources.lovelybooks
|
||||
(:require [net.cgrand.enlive-html :as html]
|
||||
[clojure.string :as str]))
|
||||
|
||||
(defn encode-url-part [part]
|
||||
(java.net.URLEncoder/encode part "UTF-8"))
|
||||
|
||||
(defn encode-url [url]
|
||||
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
||||
author (nth parts 1)
|
||||
book-title (nth parts 2)]
|
||||
(str "https://lovelybooks.de/autor/" author "/" book-title)))
|
||||
|
||||
(defn find-cover-image [url]
|
||||
(let [encoded-url (encode-url url)
|
||||
src (-> (html/html-resource (java.net.URL. encoded-url))
|
||||
(html/select [:img.ResponsiveImage.BookCover])
|
||||
first
|
||||
(get-in [:attrs :srcset])
|
||||
(str/split #" "))]
|
||||
(last (filter #(re-matches #"http.+\.jpg" %) src))))
|
8
src/buchdesmonats/sources/mojoreads.clj
Normal file
8
src/buchdesmonats/sources/mojoreads.clj
Normal file
|
@ -0,0 +1,8 @@
|
|||
(ns buchdesmonats.sources.mojoreads
|
||||
(:require [net.cgrand.enlive-html :as html]))
|
||||
|
||||
(defn find-cover-image [url]
|
||||
(-> (html/html-resource (java.net.URL. url))
|
||||
(html/select [:div.mojoreads-page-content-container :img])
|
||||
(first)
|
||||
(get-in [:attrs :src])))
|
Loading…
Reference in a new issue