diff --git a/src/ldview/tasks/runner.clj b/src/ldview/tasks/runner.clj new file mode 100644 index 0000000..ad065d4 --- /dev/null +++ b/src/ldview/tasks/runner.clj @@ -0,0 +1,37 @@ +(ns ldview.tasks.runner + (:require [ldview.tasks.scrape :as scrape] + [ldview.tasks.images :as images] + [me.raynes.fs :as fs]) + (:use [ldview.util])) + +(defn cleanup [] + (if (fs/exists? *image-base-path*) + (fs/delete-dir *image-base-path*)) + (fs/mkdirs (str *image-base-path* "/thumbs/")) + (fs/mkdirs (str *image-base-path* "/fullscreen/")) + (fs/mkdirs (str *image-base-path* "/raw/"))) + +(defn save-entry [entry] + ; TODO: Save to Database + (if (:images entry) + (map (fn [image-url] + (let [id (:id entry) + number (last (first (re-seq #"shot([0-9]+)" image-url))) + raw-image-path (images/image-name "raw" number)] + (scrape/save-image-from-url image-url raw-image-path) + (images/sourceimage->fullscreen raw-image-path (images/image-name "fullscreen" number)) + (images/sourceimage->thumb raw-image-path (images/image-name "thumbs" number)))) + (:images entry)))) + +; TODO: Make it multithreaded +(defn fetch-all-content [] + (let [pages 1] ;(scrape/number-of-pages)] + (map (fn [page] + (map save-entry (scrape/fetch-page 1))) + (range pages)))) + + +(defn start [] + (cleanup) + (fetch-all-content)) + diff --git a/src/ldview/tasks/scrape.clj b/src/ldview/tasks/scrape.clj index ca24b60..b9a0aec 100644 --- a/src/ldview/tasks/scrape.clj +++ b/src/ldview/tasks/scrape.clj @@ -22,9 +22,9 @@ (defn fetch-url [url] (html/html-resource (java.net.URL. url))) -(defn save-image-from-url [url target-path new-name] +(defn save-image-from-url [url target-file] (with-open [bodystream (:body (http/get url {:as :stream}))] - (io/copy bodystream (io/file (str target-path new-name))))) + (io/copy bodystream (io/file target-file)))) ; The actual scraping process. We crawl through the entire content results @@ -51,16 +51,12 @@ links (links-on-entry content) description (html/text (nth (html/select [content] [:p]) 2)) images (images-on-entry content)] - {:title title + {:id entry-id + :title title :author author :description description :links links :images images})) -; Do the job -; TODO: Make it multithreaded and resumable. (defn fetch-page [page] (map entry-details (entries-on-page page))) - -(defn fetch-all [] - (map fetch-page (number-of-pages))) diff --git a/src/ldview/util.clj b/src/ldview/util.clj index 518cdff..cb40727 100644 --- a/src/ldview/util.clj +++ b/src/ldview/util.clj @@ -1,9 +1,7 @@ -(ns ldview.util - (:require [noir.io :as io] - [markdown.core :as md])) +(ns ldview.util) ; Global stuff that does not change over time and needed everywhere (def ^:dynamic *base-url* "http://www.ludumdare.com/compo/") -(def ^:dynamic *image-base-path* "content/images") +(def ^:dynamic *image-base-path* "resources/public/img/ld") (def ^:dynamic *competition* 27)