Fix broken images with umlauts in the author or the book title field

This commit is contained in:
Aaron Mueller 2014-06-10 23:09:36 +02:00
parent f85e74bb27
commit 2cb420a98d

View file

@ -35,12 +35,21 @@
(http-client/get {:as :byte-array})
:body))
(defn encode-url-part [part]
(java.net.URLEncoder/encode part "UTF-8"))
(defn encode-url [url]
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
author (nth parts 1)
book-title (nth parts 2)]
(str "http://lovelybooks.de/autor/" author "/" book-title)))
(defn url->file [lovelybooks-url target-dir]
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
(-> (str author "_" title ".jpg")
str/lower-case
(str/replace #"[^a-z0-9-_.]" "")
(#(io/file target-dir %)))))
str/lower-case
(str/replace #"[^a-z0-9-_.]" "")
(#(io/file target-dir %)))))
(defn scrape-book-urls [github-url]
(->> (http-client/get github-url)
@ -50,9 +59,10 @@
(remove nil?)))
(defn scrape-book-cover [url target-dir]
(let [target-file (url->file url target-dir)]
(let [target-file (url->file url target-dir)
encoded-url (encode-url url)]
(with-open [out (io/output-stream target-file)]
(.write out (imgurl->bytes url)))))
(.write out (imgurl->bytes encoded-url)))))
(defn find-missing-covers [books-url target-dir]
(remove #(fs/exists? (url->file % target-dir))