Fix the html cover scraping (lovelybooks changed the html again)

2018-07-13 23:31:00 +02:00 · 2018-07-13 23:31:00 +02:00 · 695afe17db
commit 695afe17db
parent 641fcb8712
1 changed files with 12 additions and 9 deletions
--- a/src/buchdesmonats/core.clj
+++ b/src/buchdesmonats/core.clj
@ -27,13 +27,16 @@
            [me.raynes.fs :as fs]))

 (defn imgurl->bytes [lovelybooks-url]
-  (-> (java.net.URL. lovelybooks-url)
-      html/html-resource
-      (html/select [:div.bookcoverXXL :> :div :> :img])
-      first
-      (get-in [:attrs :src])
-      (http-client/get {:as :byte-array})
-      :body))
+  (let [urls (-> (java.net.URL. lovelybooks-url)
+                 html/html-resource
+                 (html/select [:div.cover-wrapper :> :div.cover :> :img.BookCover])
+                 first
+                 (get-in [:attrs :srcset])
+                 (str/split #" "))
+        url-to-fetch (last (filter #(re-matches #"http.+\.jpg" %) urls))
+        stream (http-client/get url-to-fetch {:as :byte-array})]
+    (:body stream)))
+

 (defn encode-url-part [part]
  (java.net.URLEncoder/encode part "UTF-8"))
@ -42,7 +45,7 @@
  (let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
        author (nth parts 1)
        book-title (nth parts 2)]
-       (str "https://lovelybooks.de/autor/" author "/" book-title)))
+    (str "https://lovelybooks.de/autor/" author "/" book-title)))

 (defn url->file [lovelybooks-url target-dir]
  (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]