From f465bf6c0eb18e32525526f31b57d62b98813804 Mon Sep 17 00:00:00 2001 From: Aaron Mueller Date: Sun, 28 Sep 2014 00:09:23 +0200 Subject: [PATCH] Working on the content grabbing mechanism :) --- project.clj | 2 ++ src/luduverse/core.clj | 9 +++------ src/luduverse/handler.clj | 7 +++++-- src/luduverse/ld-images.clj | 27 ++++++++++++++++++++++++++- src/luduverse/ld-scraper.clj | 29 ++++++++++++++++++----------- 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/project.clj b/project.clj index 3ffc635..9788f3c 100644 --- a/project.clj +++ b/project.clj @@ -8,6 +8,8 @@ [lib-noir "0.8.9"] [compojure "1.1.9"] [enlive "1.1.5"] + [image-resizer "0.1.6"] + [clj-http "1.0.0"] [me.raynes/fs "1.4.6"]] :main luduverse.core :plugins [[lein-ring "0.8.10"]] diff --git a/src/luduverse/core.clj b/src/luduverse/core.clj index f7b5e9d..dbaa57b 100644 --- a/src/luduverse/core.clj +++ b/src/luduverse/core.clj @@ -1,8 +1,5 @@ -(ns luduverse.core) - -(defn foo - "I don't do a whole lot." - [x] - (println x "Hello, World!")) +(ns luduverse.core + (:require [luduverse.ld-scraper :as scraper]) + (:gen-class)) (def -main []) diff --git a/src/luduverse/handler.clj b/src/luduverse/handler.clj index 2ed613f..37a5046 100644 --- a/src/luduverse/handler.clj +++ b/src/luduverse/handler.clj @@ -2,14 +2,17 @@ (:require [compojure.core :refer :all] [compojure.route :as route] [noir.util.middleware :as middleware] - [luduverse.pages :as pages])) + [luduverse.pages :as pages] + [luduverse.ld-images :as images])) (defroutes core-routes (route/resources "/") (GET "/" [] (pages/front-page)) (route/not-found "404, sorry.")) -(defn init []) +(defn init [] + ; Create the image resource path if it is missing + (if (not (fs/exists? (images/base-path))) (fs/mkdirs (images/base-path)))) (defn destroy []) diff --git a/src/luduverse/ld-images.clj b/src/luduverse/ld-images.clj index ab2e537..9dbf8ca 100644 --- a/src/luduverse/ld-images.clj +++ b/src/luduverse/ld-images.clj @@ -4,12 +4,22 @@ [image-resizer.format :refer :all] [image-resizer.crop :refer :all] [clojure.java.io :as io] - [noir.io :as noir-io])) + [noir.io :as noir-io] + [clj-http.client :as http] + [me.raynes.fs :as fs])) + +;; NOTE: Parts of this file is grabbed from the open source lib +;; https://github.com/arg-games/ldview (defn base-path ([] (str (noir-io/resource-path) "img")) ([competition-id] (str (base-path) "/ld" competition-id "/"))) +(defn create-file-structure [competition-id] + (let [path (base-path competition-id)] + (fs/mkdirs (str path "/thumbs/")) + (fs/mkdirs (str path "/fullscreen/")))) + (defn image-name [competition-id folder entry-id number] (str (base-path competition-id) folder "/" entry-id "_" number ".png")) @@ -32,3 +42,18 @@ (defn sourceimage->fullscreen [image-path new-image-path] (with-redefs [image-resizer.fs/new-filename (fn [filepath dimensions] (str filepath))] (as-file (resize (io/file image-path) 800 600) new-image-path))) + +(defn save-image-from-url [url target-file] + (with-open [bodystream (:body (http/get url {:as :stream}))] + (io/copy bodystream (io/file target-file)))) + +(defn save-images-for-entry [competition-id entry] + (doseq [image-url (:images entry)] + (let [id (:ld_uid new-entry) + number (last (first (re-seq #"shot([0-9]+)" image-url))) + raw-image-path (image-name competition-id "raw" id number)] + (if-not (fs/exists? raw-image-path) + (do + (save-image-from-url image-url raw-image-path) + (sourceimage->fullscreen raw-image-path (image-name competition-id "fullscreen" id number)) + (sourceimage->thumb raw-image-path (image-name competition-id "thumbs" id number))))))) diff --git a/src/luduverse/ld-scraper.clj b/src/luduverse/ld-scraper.clj index 534dcab..59a3212 100644 --- a/src/luduverse/ld-scraper.clj +++ b/src/luduverse/ld-scraper.clj @@ -1,8 +1,7 @@ (ns luduverse.ld-scraper (:require [net.cgrand.enlive-html :as html] - [clj-http.client :as http] [clojure.java.io :as io] - [clojure.string :refer [split]])) + [clojure.string :refer [split trim]])) ;; NOTE: This part is grabbed from the open source lib ;; https://github.com/arg-games/ldview @@ -22,10 +21,6 @@ (defn fetch-url [url] (html/html-resource (java.net.URL. url))) -(defn save-image-from-url [url target-file] - (with-open [bodystream (:body (http/get url {:as :stream}))] - (io/copy bodystream (io/file target-file)))) - ; The actual scraping process. We crawl through the entire content results ; and fetch the relevant information from the DOM. @@ -38,15 +33,22 @@ links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))] (map #(last (split %1 #"=")) links))) -(defn theme [competition-id] - (let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])] - (first (:content (first p))))) +(defn theme + "We pull the themes for the competition from wikipedia because the LD page +places the theme on random places and I don't want to build up 25 different +selectors for 28 different places. This does simply not work and I give up +and simply fetch it from Wikipedia. End of discussion :)" + [competition-id] + (-> (fetch-url "http://en.wikipedia.org/wiki/Ludum_Dare") + (html/select [:table.wikitable (html/nth-child (+ 3 competition-id)) (html/nth-child 3)]) + first :content first + trim)) (defn links-on-entry [content] - (map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select [content] [:p.links :> :a]))) + (map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select content [:p.links :> :a]))) (defn images-on-entry [content] - (map #(:href (:attrs %1)) (html/select [content] [:table html/first-child :a]))) + (map #(:href (:attrs %1)) (html/select content [:table html/first-child :a]))) (defn format-entry-type [unformatted-type] (if (= unformatted-type "Jam Entry") "jam" "compo")) @@ -65,3 +67,8 @@ :type (format-entry-type unformatted-type) :links links :images images})) + + +;; Save all what we get +(defn save-entry [competition-id entry] + "Here we save the stuff ...")