Fix the html cover scraping (lovelybooks changed the html again)
This commit is contained in:
parent
641fcb8712
commit
695afe17db
1 changed files with 12 additions and 9 deletions
|
@ -27,13 +27,16 @@
|
||||||
[me.raynes.fs :as fs]))
|
[me.raynes.fs :as fs]))
|
||||||
|
|
||||||
(defn imgurl->bytes [lovelybooks-url]
|
(defn imgurl->bytes [lovelybooks-url]
|
||||||
(-> (java.net.URL. lovelybooks-url)
|
(let [urls (-> (java.net.URL. lovelybooks-url)
|
||||||
html/html-resource
|
html/html-resource
|
||||||
(html/select [:div.bookcoverXXL :> :div :> :img])
|
(html/select [:div.cover-wrapper :> :div.cover :> :img.BookCover])
|
||||||
first
|
first
|
||||||
(get-in [:attrs :src])
|
(get-in [:attrs :srcset])
|
||||||
(http-client/get {:as :byte-array})
|
(str/split #" "))
|
||||||
:body))
|
url-to-fetch (last (filter #(re-matches #"http.+\.jpg" %) urls))
|
||||||
|
stream (http-client/get url-to-fetch {:as :byte-array})]
|
||||||
|
(:body stream)))
|
||||||
|
|
||||||
|
|
||||||
(defn encode-url-part [part]
|
(defn encode-url-part [part]
|
||||||
(java.net.URLEncoder/encode part "UTF-8"))
|
(java.net.URLEncoder/encode part "UTF-8"))
|
||||||
|
@ -42,7 +45,7 @@
|
||||||
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
(let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
|
||||||
author (nth parts 1)
|
author (nth parts 1)
|
||||||
book-title (nth parts 2)]
|
book-title (nth parts 2)]
|
||||||
(str "https://lovelybooks.de/autor/" author "/" book-title)))
|
(str "https://lovelybooks.de/autor/" author "/" book-title)))
|
||||||
|
|
||||||
(defn url->file [lovelybooks-url target-dir]
|
(defn url->file [lovelybooks-url target-dir]
|
||||||
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
||||||
|
@ -87,7 +90,7 @@
|
||||||
(let [content (apply str (index-template book-urls))]
|
(let [content (apply str (index-template book-urls))]
|
||||||
(with-open [out (io/writer (io/file "public" "index.html"))]
|
(with-open [out (io/writer (io/file "public" "index.html"))]
|
||||||
(.write out content))))
|
(.write out content))))
|
||||||
|
|
||||||
(defn -main [& args]
|
(defn -main [& args]
|
||||||
(let [datasource-url "https://git.okoyono.de/mezzomix/buch_des_monats/raw/master/README.mkd"
|
(let [datasource-url "https://git.okoyono.de/mezzomix/buch_des_monats/raw/master/README.mkd"
|
||||||
target-dir (io/file "public" "book-covers")]
|
target-dir (io/file "public" "book-covers")]
|
||||||
|
|
Loading…
Reference in a new issue