(ns buchdesmonats.core (:require [net.cgrand.enlive-html :as html] [clj-http.client :as http-client] [clojure.string :as str] [clojure.java.io :as io])) (defn imgurl->bytes [lovelybooks-url] (-> (java.net.URL. lovelybooks-url) html/html-resource (html/select [:div.bookcoverXXL :> :div :> :img]) first (get-in [:attrs :src]) (#(str "http:" %)) (http-client/get {:as :byte-array}) :body)) (defn url->filename [lovelybooks-url] (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)] (str/lower-case (str author "_" title ".jpg")))) (defn scrape-book-urls [github-url] (->> (http-client/get github-url) :body str/split-lines (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %))) (remove nil?))) (defn scrape-book-cover [url] (with-open [out (io/output-stream (url->filename url))] (.write out (imgurl->bytes url)))) (defn -main [& args] (let [books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"] (doall (pmap #(scrape-book-cover %) (scrape-book-urls books-url))) true))