From f465bf6c0eb18e32525526f31b57d62b98813804 Mon Sep 17 00:00:00 2001
From: Aaron Mueller <mail@aaron-mueller.de>
Date: Sun, 28 Sep 2014 00:09:23 +0200
Subject: [PATCH] Working on the content grabbing mechanism :)

---
 project.clj                  |  2 ++
 src/luduverse/core.clj       |  9 +++------
 src/luduverse/handler.clj    |  7 +++++--
 src/luduverse/ld-images.clj  | 27 ++++++++++++++++++++++++++-
 src/luduverse/ld-scraper.clj | 29 ++++++++++++++++++-----------
 5 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/project.clj b/project.clj
index 3ffc635..9788f3c 100644
--- a/project.clj
+++ b/project.clj
@@ -8,6 +8,8 @@
                  [lib-noir "0.8.9"]
                  [compojure "1.1.9"]
                  [enlive "1.1.5"]
+                 [image-resizer "0.1.6"]
+                 [clj-http "1.0.0"]
                  [me.raynes/fs "1.4.6"]]
   :main luduverse.core
   :plugins [[lein-ring "0.8.10"]]
diff --git a/src/luduverse/core.clj b/src/luduverse/core.clj
index f7b5e9d..dbaa57b 100644
--- a/src/luduverse/core.clj
+++ b/src/luduverse/core.clj
@@ -1,8 +1,5 @@
-(ns luduverse.core)
-
-(defn foo
-  "I don't do a whole lot."
-  [x]
-  (println x "Hello, World!"))
+(ns luduverse.core
+  (:require [luduverse.ld-scraper :as scraper])
+  (:gen-class))
 
 (def -main [])
diff --git a/src/luduverse/handler.clj b/src/luduverse/handler.clj
index 2ed613f..37a5046 100644
--- a/src/luduverse/handler.clj
+++ b/src/luduverse/handler.clj
@@ -2,14 +2,17 @@
   (:require [compojure.core :refer :all]
             [compojure.route :as route]
             [noir.util.middleware :as middleware]
-            [luduverse.pages :as pages]))
+            [luduverse.pages :as pages]
+            [luduverse.ld-images :as images]))
 
 (defroutes core-routes
   (route/resources "/")
   (GET "/" [] (pages/front-page))
   (route/not-found "404, sorry."))
 
-(defn init [])
+(defn init []
+  ; Create the image resource path if it is missing
+  (if (not (fs/exists? (images/base-path))) (fs/mkdirs (images/base-path))))
 
 (defn destroy [])
 
diff --git a/src/luduverse/ld-images.clj b/src/luduverse/ld-images.clj
index ab2e537..9dbf8ca 100644
--- a/src/luduverse/ld-images.clj
+++ b/src/luduverse/ld-images.clj
@@ -4,12 +4,22 @@
             [image-resizer.format :refer :all]
             [image-resizer.crop :refer :all]
             [clojure.java.io :as io]
-            [noir.io :as noir-io]))
+            [noir.io :as noir-io]
+            [clj-http.client :as http]
+            [me.raynes.fs :as fs]))
+
+;; NOTE: Parts of this file is grabbed from the open source lib
+;; https://github.com/arg-games/ldview
 
 (defn base-path
   ([] (str (noir-io/resource-path) "img"))
   ([competition-id] (str (base-path) "/ld" competition-id "/")))
 
+(defn create-file-structure [competition-id]
+  (let [path (base-path competition-id)]
+    (fs/mkdirs (str path "/thumbs/"))
+    (fs/mkdirs (str path "/fullscreen/"))))
+
 (defn image-name [competition-id folder entry-id number]
   (str (base-path competition-id) folder "/" entry-id "_" number ".png"))
 
@@ -32,3 +42,18 @@
 (defn sourceimage->fullscreen [image-path new-image-path]
   (with-redefs [image-resizer.fs/new-filename (fn [filepath dimensions] (str filepath))]
     (as-file (resize (io/file image-path) 800 600) new-image-path)))
+
+(defn save-image-from-url [url target-file]
+  (with-open [bodystream (:body (http/get url {:as :stream}))]
+    (io/copy bodystream (io/file target-file))))
+
+(defn save-images-for-entry [competition-id entry]
+  (doseq [image-url (:images entry)]
+    (let [id (:ld_uid new-entry)
+          number (last (first (re-seq #"shot([0-9]+)" image-url)))
+          raw-image-path (image-name competition-id "raw" id number)]
+      (if-not (fs/exists? raw-image-path)
+        (do
+          (save-image-from-url image-url raw-image-path)
+          (sourceimage->fullscreen raw-image-path (image-name competition-id "fullscreen" id number))
+          (sourceimage->thumb raw-image-path (image-name competition-id "thumbs" id number)))))))
diff --git a/src/luduverse/ld-scraper.clj b/src/luduverse/ld-scraper.clj
index 534dcab..59a3212 100644
--- a/src/luduverse/ld-scraper.clj
+++ b/src/luduverse/ld-scraper.clj
@@ -1,8 +1,7 @@
 (ns luduverse.ld-scraper
   (:require [net.cgrand.enlive-html :as html]
-            [clj-http.client :as http]
             [clojure.java.io :as io]
-            [clojure.string :refer [split]]))
+            [clojure.string :refer [split trim]]))
 
 ;; NOTE: This part is grabbed from the open source lib
 ;; https://github.com/arg-games/ldview
@@ -22,10 +21,6 @@
 (defn fetch-url [url]
   (html/html-resource (java.net.URL. url)))
 
-(defn save-image-from-url [url target-file]
-  (with-open [bodystream (:body (http/get url {:as :stream}))]
-    (io/copy bodystream (io/file target-file))))
-
 
 ; The actual scraping process. We crawl through the entire content results
 ; and fetch the relevant information from the DOM.
@@ -38,15 +33,22 @@
         links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
     (map #(last (split %1 #"=")) links)))
 
-(defn theme [competition-id]
-  (let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
-    (first (:content (first p)))))
+(defn theme
+  "We pull the themes for the competition from wikipedia because the LD page
+places the theme on random places and I don't want to build up 25 different
+selectors for 28 different places. This does simply not work and I give up
+and simply fetch it from Wikipedia. End of discussion :)"
+  [competition-id]
+  (-> (fetch-url "http://en.wikipedia.org/wiki/Ludum_Dare")
+      (html/select [:table.wikitable (html/nth-child (+ 3 competition-id)) (html/nth-child 3)])
+      first :content first
+      trim))
 
 (defn links-on-entry [content]
-  (map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select [content] [:p.links :> :a])))
+  (map (fn [x] {:title (first (:content x)) :url (:href (:attrs x))}) (html/select content [:p.links :> :a])))
 
 (defn images-on-entry [content]
-  (map #(:href (:attrs %1)) (html/select [content] [:table html/first-child :a])))
+  (map #(:href (:attrs %1)) (html/select content [:table html/first-child :a])))
 
 (defn format-entry-type [unformatted-type]
   (if (= unformatted-type "Jam Entry") "jam" "compo"))
@@ -65,3 +67,8 @@
      :type (format-entry-type unformatted-type)
      :links links
      :images images}))
+
+
+;; Save all what we get
+(defn save-entry [competition-id entry]
+  "Here we save the stuff ...")