Fix broken images with umlauts in the author or the book title field

This commit is contained in:
Aaron Mueller 2014-06-10 23:09:36 +02:00
parent f85e74bb27
commit 2cb420a98d

View file

@ -35,6 +35,15 @@
(http-client/get {:as :byte-array}) (http-client/get {:as :byte-array})
:body)) :body))
(defn encode-url-part [part]
(java.net.URLEncoder/encode part "UTF-8"))
(defn encode-url [url]
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
author (nth parts 1)
book-title (nth parts 2)]
(str "http://lovelybooks.de/autor/" author "/" book-title)))
(defn url->file [lovelybooks-url target-dir] (defn url->file [lovelybooks-url target-dir]
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
(-> (str author "_" title ".jpg") (-> (str author "_" title ".jpg")
@ -50,9 +59,10 @@
(remove nil?))) (remove nil?)))
(defn scrape-book-cover [url target-dir] (defn scrape-book-cover [url target-dir]
(let [target-file (url->file url target-dir)] (let [target-file (url->file url target-dir)
encoded-url (encode-url url)]
(with-open [out (io/output-stream target-file)] (with-open [out (io/output-stream target-file)]
(.write out (imgurl->bytes url))))) (.write out (imgurl->bytes encoded-url)))))
(defn find-missing-covers [books-url target-dir] (defn find-missing-covers [books-url target-dir]
(remove #(fs/exists? (url->file % target-dir)) (remove #(fs/exists? (url->file % target-dir))