Initial commit
This commit is contained in:
commit
003401b5f1
2 changed files with 48 additions and 0 deletions
10
project.clj
Normal file
10
project.clj
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
(defproject buchdesmonats "1.0"
|
||||||
|
:description "A simple tool to fetch all books of the months from the cthn.de project"
|
||||||
|
:url "http://cthn.de/projects/buch_des_monats"
|
||||||
|
:license {:name "Eclipse Public License"
|
||||||
|
:url "http://www.eclipse.org/legal/epl-v10.html"}
|
||||||
|
:dependencies [[org.clojure/clojure "1.5.1"]
|
||||||
|
[enlive "1.1.5"]
|
||||||
|
[me.raynes/fs "1.4.4"]
|
||||||
|
[clj-http "0.9.1"]]
|
||||||
|
:main buchdesmonats.core)
|
38
src/buchdesmonats/core.clj
Normal file
38
src/buchdesmonats/core.clj
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
(ns buchdesmonats.core
|
||||||
|
(:require [net.cgrand.enlive-html :as html]
|
||||||
|
[clj-http.client :as http-client]
|
||||||
|
[clojure.string :as str]
|
||||||
|
[clojure.java.io :as io]))
|
||||||
|
|
||||||
|
(defn imgurl->bytes [lovelybooks-url]
|
||||||
|
(-> (java.net.URL. lovelybooks-url)
|
||||||
|
html/html-resource
|
||||||
|
(html/select [:div.bookcoverXXL :> :div :> :img])
|
||||||
|
first
|
||||||
|
(get-in [:attrs :src])
|
||||||
|
(#(str "http:" %))
|
||||||
|
(http-client/get {:as :byte-array})
|
||||||
|
:body))
|
||||||
|
|
||||||
|
(defn url->filename [lovelybooks-url]
|
||||||
|
(let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
|
||||||
|
(str/lower-case (str author "_" title ".jpg"))))
|
||||||
|
|
||||||
|
|
||||||
|
(defn scrape-book-urls [github-url]
|
||||||
|
(->> (http-client/get github-url)
|
||||||
|
:body
|
||||||
|
str/split-lines
|
||||||
|
(map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
|
||||||
|
(remove nil?)))
|
||||||
|
|
||||||
|
(defn scrape-book-cover [url]
|
||||||
|
(with-open [out (io/output-stream (url->filename url))]
|
||||||
|
(.write out (imgurl->bytes url))))
|
||||||
|
|
||||||
|
|
||||||
|
(defn -main [& args]
|
||||||
|
(let [books-url "https://raw.github.com/CTHN/wiki-data/master/pages/projects/buch_des_monats.mkd"]
|
||||||
|
(doall (pmap #(scrape-book-cover %)
|
||||||
|
(scrape-book-urls books-url)))
|
||||||
|
true))
|
Loading…
Reference in a new issue