Working on the content grabbing mechanism :)
This commit is contained in:
parent
dce6a86ec7
commit
f465bf6c0e
5 changed files with 54 additions and 20 deletions
|
@ -8,6 +8,8 @@
|
||||||
[lib-noir "0.8.9"]
|
[lib-noir "0.8.9"]
|
||||||
[compojure "1.1.9"]
|
[compojure "1.1.9"]
|
||||||
[enlive "1.1.5"]
|
[enlive "1.1.5"]
|
||||||
|
[image-resizer "0.1.6"]
|
||||||
|
[clj-http "1.0.0"]
|
||||||
[me.raynes/fs "1.4.6"]]
|
[me.raynes/fs "1.4.6"]]
|
||||||
:main luduverse.core
|
:main luduverse.core
|
||||||
:plugins [[lein-ring "0.8.10"]]
|
:plugins [[lein-ring "0.8.10"]]
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
(ns luduverse.core)
|
(ns luduverse.core
|
||||||
|
(:require [luduverse.ld-scraper :as scraper])
|
||||||
(defn foo
|
(:gen-class))
|
||||||
"I don't do a whole lot."
|
|
||||||
[x]
|
|
||||||
(println x "Hello, World!"))
|
|
||||||
|
|
||||||
(def -main [])
|
(def -main [])
|
||||||
|
|
|
@ -2,14 +2,17 @@
|
||||||
(:require [compojure.core :refer :all]
|
(:require [compojure.core :refer :all]
|
||||||
[compojure.route :as route]
|
[compojure.route :as route]
|
||||||
[noir.util.middleware :as middleware]
|
[noir.util.middleware :as middleware]
|
||||||
[luduverse.pages :as pages]))
|
[luduverse.pages :as pages]
|
||||||
|
[luduverse.ld-images :as images]))
|
||||||
|
|
||||||
(defroutes core-routes
|
(defroutes core-routes
|
||||||
(route/resources "/")
|
(route/resources "/")
|
||||||
(GET "/" [] (pages/front-page))
|
(GET "/" [] (pages/front-page))
|
||||||
(route/not-found "404, sorry."))
|
(route/not-found "404, sorry."))
|
||||||
|
|
||||||
(defn init [])
|
(defn init []
|
||||||
|
; Create the image resource path if it is missing
|
||||||
|
(if (not (fs/exists? (images/base-path))) (fs/mkdirs (images/base-path))))
|
||||||
|
|
||||||
(defn destroy [])
|
(defn destroy [])
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,22 @@
|
||||||
[image-resizer.format :refer :all]
|
[image-resizer.format :refer :all]
|
||||||
[image-resizer.crop :refer :all]
|
[image-resizer.crop :refer :all]
|
||||||
[clojure.java.io :as io]
|
[clojure.java.io :as io]
|
||||||
[noir.io :as noir-io]))
|
[noir.io :as noir-io]
|
||||||
|
[clj-http.client :as http]
|
||||||
|
[me.raynes.fs :as fs]))
|
||||||
|
|
||||||
|
;; NOTE: Parts of this file is grabbed from the open source lib
|
||||||
|
;; https://github.com/arg-games/ldview
|
||||||
|
|
||||||
(defn base-path
|
(defn base-path
|
||||||
([] (str (noir-io/resource-path) "img"))
|
([] (str (noir-io/resource-path) "img"))
|
||||||
([competition-id] (str (base-path) "/ld" competition-id "/")))
|
([competition-id] (str (base-path) "/ld" competition-id "/")))
|
||||||
|
|
||||||
|
(defn create-file-structure [competition-id]
|
||||||
|
(let [path (base-path competition-id)]
|
||||||
|
(fs/mkdirs (str path "/thumbs/"))
|
||||||
|
(fs/mkdirs (str path "/fullscreen/"))))
|
||||||
|
|
||||||
(defn image-name [competition-id folder entry-id number]
|
(defn image-name [competition-id folder entry-id number]
|
||||||
(str (base-path competition-id) folder "/" entry-id "_" number ".png"))
|
(str (base-path competition-id) folder "/" entry-id "_" number ".png"))
|
||||||
|
|
||||||
|
@ -32,3 +42,18 @@
|
||||||
(defn sourceimage->fullscreen [image-path new-image-path]
|
(defn sourceimage->fullscreen [image-path new-image-path]
|
||||||
(with-redefs [image-resizer.fs/new-filename (fn [filepath dimensions] (str filepath))]
|
(with-redefs [image-resizer.fs/new-filename (fn [filepath dimensions] (str filepath))]
|
||||||
(as-file (resize (io/file image-path) 800 600) new-image-path)))
|
(as-file (resize (io/file image-path) 800 600) new-image-path)))
|
||||||
|
|
||||||
|
(defn save-image-from-url [url target-file]
|
||||||
|
(with-open [bodystream (:body (http/get url {:as :stream}))]
|
||||||
|
(io/copy bodystream (io/file target-file))))
|
||||||
|
|
||||||
|
(defn save-images-for-entry [competition-id entry]
|
||||||
|
(doseq [image-url (:images entry)]
|
||||||
|
(let [id (:ld_uid new-entry)
|
||||||
|
number (last (first (re-seq #"shot([0-9]+)" image-url)))
|
||||||
|
raw-image-path (image-name competition-id "raw" id number)]
|
||||||
|
(if-not (fs/exists? raw-image-path)
|
||||||
|
(do
|
||||||
|
(save-image-from-url image-url raw-image-path)
|
||||||
|
(sourceimage->fullscreen raw-image-path (image-name competition-id "fullscreen" id number))
|
||||||
|
(sourceimage->thumb raw-image-path (image-name competition-id "thumbs" id number)))))))
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
(ns luduverse.ld-scraper
|
(ns luduverse.ld-scraper
|
||||||
(:require [net.cgrand.enlive-html :as html]
|
(:require [net.cgrand.enlive-html :as html]
|
||||||
[clj-http.client :as http]
|
|
||||||
[clojure.java.io :as io]
|
[clojure.java.io :as io]
|
||||||
[clojure.string :refer [split]]))
|
[clojure.string :refer [split trim]]))
|
||||||
|
|
||||||
;; NOTE: This part is grabbed from the open source lib
|
;; NOTE: This part is grabbed from the open source lib
|
||||||
;; https://github.com/arg-games/ldview
|
;; https://github.com/arg-games/ldview
|
||||||
|
@ -22,10 +21,6 @@
|
||||||
(defn fetch-url [url]
|
(defn fetch-url [url]
|
||||||
(html/html-resource (java.net.URL. url)))
|
(html/html-resource (java.net.URL. url)))
|
||||||
|
|
||||||
(defn save-image-from-url [url target-file]
|
|
||||||
(with-open [bodystream (:body (http/get url {:as :stream}))]
|
|
||||||
(io/copy bodystream (io/file target-file))))
|
|
||||||
|
|
||||||
|
|
||||||
; The actual scraping process. We crawl through the entire content results
|
; The actual scraping process. We crawl through the entire content results
|
||||||
; and fetch the relevant information from the DOM.
|
; and fetch the relevant information from the DOM.
|
||||||
|
@ -38,15 +33,22 @@
|
||||||
links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
|
links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
|
||||||
(map #(last (split %1 #"=")) links)))
|
(map #(last (split %1 #"=")) links)))
|
||||||
|
|
||||||
(defn theme [competition-id]
|
(defn theme
|
||||||
(let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
|
"We pull the themes for the competition from wikipedia because the LD page
|
||||||
(first (:content (first p)))))
|
places the theme on random places and I don't want to build up 25 different
|
||||||
|
selectors for 28 different places. This does simply not work and I give up
|
||||||
|
and simply fetch it from Wikipedia. End of discussion :)"
|
||||||
|
[competition-id]
|
||||||
|
(-> (fetch-url "http://en.wikipedia.org/wiki/Ludum_Dare")
|
||||||
|
(html/select [:table.wikitable (html/nth-child (+ 3 competition-id)) (html/nth-child 3)])
|
||||||
|
first :content first
|
||||||
|
trim))
|
||||||
|
|
||||||
(defn links-on-entry [content]
|
(defn links-on-entry [content]
|
||||||
(map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select [content] [:p.links :> :a])))
|
(map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select content [:p.links :> :a])))
|
||||||
|
|
||||||
(defn images-on-entry [content]
|
(defn images-on-entry [content]
|
||||||
(map #(:href (:attrs %1)) (html/select [content] [:table html/first-child :a])))
|
(map #(:href (:attrs %1)) (html/select content [:table html/first-child :a])))
|
||||||
|
|
||||||
(defn format-entry-type [unformatted-type]
|
(defn format-entry-type [unformatted-type]
|
||||||
(if (= unformatted-type "Jam Entry") "jam" "compo"))
|
(if (= unformatted-type "Jam Entry") "jam" "compo"))
|
||||||
|
@ -65,3 +67,8 @@
|
||||||
:type (format-entry-type unformatted-type)
|
:type (format-entry-type unformatted-type)
|
||||||
:links links
|
:links links
|
||||||
:images images}))
|
:images images}))
|
||||||
|
|
||||||
|
|
||||||
|
;; Save all what we get
|
||||||
|
(defn save-entry [competition-id entry]
|
||||||
|
"Here we save the stuff ...")
|
||||||
|
|
Loading…
Reference in a new issue