Jeden Monat ein neuer Buchtip. http://buchdesmonats.okoyono.de
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

core.clj 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. ;;; Copyright (C) 2014-2019 Aaron Fischer <mail@aaron-fischer.net>
  2. ;;;
  3. ;;; Permission is hereby granted, free of charge, to any person obtaining a copy of
  4. ;;; this software and associated documentation files (the "Software"), to deal in
  5. ;;; the Software without restriction, including without limitation the rights to
  6. ;;; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7. ;;; the Software, and to permit persons to whom the Software is furnished to do so,
  8. ;;; subject to the following conditions:
  9. ;;;
  10. ;;; The above copyright notice and this permission notice shall be included in all
  11. ;;; copies or substantial portions of the Software.
  12. ;;;
  13. ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. ;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
  15. ;;; FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
  16. ;;; COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  17. ;;; IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  18. ;;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  19. (ns buchdesmonats.core
  20. (:gen-class)
  21. (:require [net.cgrand.enlive-html :as html]
  22. [clj-http.client :as http-client]
  23. [clojure.string :as str]
  24. [clojure.java.io :as io]
  25. [clojure.tools.logging :as log]
  26. [me.raynes.fs :as fs]))
  27. (defn imgurl->bytes [lovelybooks-url]
  28. (let [urls (-> (java.net.URL. lovelybooks-url)
  29. html/html-resource
  30. (html/select [:img.ResponsiveImage.BookCover])
  31. first
  32. (get-in [:attrs :srcset])
  33. (str/split #" "))
  34. url-to-fetch (last (filter #(re-matches #"http.+\.jpg" %) urls))
  35. stream (http-client/get url-to-fetch {:as :byte-array})]
  36. (:body stream)))
  37. (defn encode-url-part [part]
  38. (java.net.URLEncoder/encode part "UTF-8"))
  39. (defn encode-url [url]
  40. (let [parts (map encode-url-part (re-find #"\/autor\/([^/]+)\/([^/]+)\/?" url))
  41. author (nth parts 1)
  42. book-title (nth parts 2)]
  43. (str "https://lovelybooks.de/autor/" author "/" book-title)))
  44. (defn url->file [lovelybooks-url target-dir]
  45. (let [[_ author title] (re-find #".\/autor\/([^\/]+)\/(.+)-([0-9]+)-.\/$" lovelybooks-url)]
  46. (-> (str author "_" title ".jpg")
  47. str/lower-case
  48. (str/replace #"[^a-z0-9-_.]" "")
  49. (#(io/file target-dir %)))))
  50. (defn scrape-book-urls [datasource-url]
  51. (->> (http-client/get datasource-url {:insecure? true})
  52. :body
  53. str/split-lines
  54. (map #(second (re-find #"^\* .*\[.+\]\((.+)\)" %)))
  55. (remove nil?)))
  56. (defn scrape-book-cover [url target-dir]
  57. (try
  58. (let [target-file (url->file url target-dir)
  59. encoded-url (encode-url url)
  60. bytes (imgurl->bytes encoded-url)]
  61. (with-open [out (io/output-stream target-file)]
  62. (.write out bytes)))
  63. (catch Exception e
  64. (log/info "Problems with " url ", skip it."))))
  65. (defn find-missing-covers [books-url target-dir]
  66. (remove #(fs/exists? (url->file % target-dir))
  67. (scrape-book-urls books-url)))
  68. (html/defsnippet cover-item-model "buchdesmonats/layout.html" [:div#covers :> :div]
  69. [link title]
  70. [:a] (html/set-attr :href link)
  71. [:img] (html/set-attr :src (url->file link "book-covers") :title title))
  72. (html/deftemplate index-template "buchdesmonats/layout.html"
  73. [cover-urls]
  74. [:#covers] (html/content
  75. (map #(cover-item-model % "zu Lovely Books")
  76. cover-urls)))
  77. (defn generate-html [book-urls target-dir]
  78. (let [content (apply str (index-template book-urls))]
  79. (with-open [out (io/writer (io/file "public" "index.html"))]
  80. (.write out content))))
  81. (defn -main [& args]
  82. (let [datasource-url "https://git.okoyono.de/mezzomix/buch_des_monats/raw/master/README.mkd"
  83. target-dir (io/file "public" "book-covers")]
  84. (fs/mkdirs target-dir)
  85. (generate-html (scrape-book-urls datasource-url) "public")
  86. (doall (pmap #(scrape-book-cover % target-dir)
  87. (find-missing-covers datasource-url target-dir)))
  88. (System/exit 0)))