Fix broken images with umlauts in the author or the book title field
This commit is contained in:
parent
f85e74bb27
commit
2cb420a98d
1 changed files with 15 additions and 5 deletions
|
@ -35,12 +35,21 @@
|
|||
(http-client/get {:as :byte-array})
|
||||
:body))
|
||||
|
||||
(defn encode-url-part [part]
|
||||
(java.net.URLEncoder/encode part "UTF-8"))
|
||||
|
||||
(defn encode-url [url]
|
||||
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
||||
author (nth parts 1)
|
||||
book-title (nth parts 2)]
|
||||
(str "http://lovelybooks.de/autor/" author "/" book-title)))
|
||||
|
||||
(defn url->file [lovelybooks-url target-dir]
|
||||
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
||||
(-> (str author "_" title ".jpg")
|
||||
str/lower-case
|
||||
(str/replace #"[^a-z0-9-_.]" "")
|
||||
(#(io/file target-dir %)))))
|
||||
str/lower-case
|
||||
(str/replace #"[^a-z0-9-_.]" "")
|
||||
(#(io/file target-dir %)))))
|
||||
|
||||
(defn scrape-book-urls [github-url]
|
||||
(->> (http-client/get github-url)
|
||||
|
@ -50,9 +59,10 @@
|
|||
(remove nil?)))
|
||||
|
||||
(defn scrape-book-cover [url target-dir]
|
||||
(let [target-file (url->file url target-dir)]
|
||||
(let [target-file (url->file url target-dir)
|
||||
encoded-url (encode-url url)]
|
||||
(with-open [out (io/output-stream target-file)]
|
||||
(.write out (imgurl->bytes url)))))
|
||||
(.write out (imgurl->bytes encoded-url)))))
|
||||
|
||||
(defn find-missing-covers [books-url target-dir]
|
||||
(remove #(fs/exists? (url->file % target-dir))
|
||||
|
|
Loading…
Reference in a new issue