From 2cb420a98d3255d04a3cbbc502d33dafc525e328 Mon Sep 17 00:00:00 2001 From: Aaron Mueller Date: Tue, 10 Jun 2014 23:09:36 +0200 Subject: [PATCH] Fix broken images with umlauts in the author or the book title field --- src/buchdesmonats/core.clj | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/buchdesmonats/core.clj b/src/buchdesmonats/core.clj index 01e84c2..beb248f 100644 --- a/src/buchdesmonats/core.clj +++ b/src/buchdesmonats/core.clj @@ -35,12 +35,21 @@ (http-client/get {:as :byte-array}) :body)) +(defn encode-url-part [part] + (java.net.URLEncoder/encode part "UTF-8")) + +(defn encode-url [url] + (let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url)) + author (nth parts 1) + book-title (nth parts 2)] + (str "http://lovelybooks.de/autor/" author "/" book-title))) + (defn url->file [lovelybooks-url target-dir] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] (-> (str author "_" title ".jpg") - str/lower-case - (str/replace #"[^a-z0-9-_.]" "") - (#(io/file target-dir %))))) + str/lower-case + (str/replace #"[^a-z0-9-_.]" "") + (#(io/file target-dir %))))) (defn scrape-book-urls [github-url] (->> (http-client/get github-url) @@ -50,9 +59,10 @@ (remove nil?))) (defn scrape-book-cover [url target-dir] - (let [target-file (url->file url target-dir)] + (let [target-file (url->file url target-dir) + encoded-url (encode-url url)] (with-open [out (io/output-stream target-file)] - (.write out (imgurl->bytes url))))) + (.write out (imgurl->bytes encoded-url))))) (defn find-missing-covers [books-url target-dir] (remove #(fs/exists? (url->file % target-dir))