diff --git a/src/ldview/models/competition.clj b/src/ldview/models/competition.clj index a08614b..7550e36 100644 --- a/src/ldview/models/competition.clj +++ b/src/ldview/models/competition.clj @@ -1,11 +1,10 @@ (ns ldview.models.competition (:use korma.core - [ldview.models.db] - [ldview.tasks.scrape :as configuration :only [competition]])) + [ldview.models.db])) (defn latest [] (first (select competitions - (where {:number configuration/competition}) ; FIXME: Make it dynamic + (order :number :DESC) (limit 1)))) (defn exists? [number] diff --git a/src/ldview/tasks/runner.clj b/src/ldview/tasks/runner.clj index 8a9072f..8cf7488 100644 --- a/src/ldview/tasks/runner.clj +++ b/src/ldview/tasks/runner.clj @@ -6,10 +6,6 @@ [ldview.models.competition :as competition] [ldview.models.schema :as schema])) -;; TODO: This is ugly and not the right way to do. This should be cleaned up -;; so the functions does not have side effects -(def competition (atom 0)) - (defn cleanup! [] (if (fs/exists? images/base-path) (fs/delete-dir images/base-path)) @@ -29,15 +25,14 @@ (images/sourceimage->thumb raw-image-path (images/image-name "thumbs" id number)))) (:images new-entry)))) -(defn fetch-all-content [] - (let [pages (range 1)] ;(scrape/number-of-pages)] - (for [page pages] - (for [ld-uid (scrape/entries-on-page (inc page))] +; TODO: Make it multithreaded +(defn fetch-all-content [competition-id] + (let [pages (range 5)] ;(scrape/number-of-pages competition-id)] + (doseq [page pages] + (doseq [ld-uid (scrape/entries-on-page competition-id (inc page))] (if-not (entry/exists? ld-uid) - (save-entry (scrape/entry-details ld-uid))))))) - -(defn load-competition [id] - (swap! competition id) - (competition/create! id (scrape/theme)) - (fetch-all-content)) + (save-entry! (scrape/entry-details competition-id ld-uid))))))) +(defn load-competition [competition-id] + (competition/create! competition-id (scrape/theme competition-id)) + (fetch-all-content competition-id)) diff --git a/src/ldview/tasks/scrape.clj b/src/ldview/tasks/scrape.clj index 5c7da4c..994093c 100644 --- a/src/ldview/tasks/scrape.clj +++ b/src/ldview/tasks/scrape.clj @@ -2,17 +2,16 @@ (:require [net.cgrand.enlive-html :as html] [clj-http.client :as http] [clojure.java.io :as io]) - (:use [clojure.string :only (split)] - [ldview.tasks.runner :only (@competition)])) + (:use [clojure.string :only (split)])) -(defn url-action [action] - (str "http://www.ludumdare.com/compo/ludum-dare-" competition "/?action=" action)) +(defn url-action [competition-id action] + (str "http://www.ludumdare.com/compo/ludum-dare-" competition-id "/?action=" action)) -(defn url-page [page] - (str (url-action "preview") "&start=" page)) +(defn url-page [competition-id page] + (str (url-action competition-id "preview") "&start=" page)) -(defn url-entry [entry-id] - (str (url-action "preview") "&uid=" entry-id)) +(defn url-entry [competition-id entry-id] + (str (url-action competition-id "preview") "&uid=" entry-id)) ; Helpers to fetch an html resource or save an url from the web into a ; local file. This thow functions will be needed to pull the content from @@ -27,17 +26,17 @@ ; The actual scraping process. We crawl through the entire content results ; and fetch the relevant information from the DOM. -(defn number-of-pages [] - (let [p (second (html/select (fetch-url (url-action "preview")) [:div#compo2 :> :p]))] +(defn number-of-pages [competition-id] + (let [p (second (html/select (fetch-url (url-action competition-id "preview")) [:div#compo2 :> :p]))] (read-string (html/text (last (butlast (html/select [p] [:a]))))))) -(defn entries-on-page [page] - (let [tds (html/select (fetch-url (url-page page)) #{[:.alt-1], [:.alt-2]}) +(defn entries-on-page [competition-id page] + (let [tds (html/select (fetch-url (url-page competition-id page)) #{[:.alt-1], [:.alt-2]}) links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))] (map #(last (split %1 #"=")) links))) -(defn theme [] - (let [p (html/select (fetch-url (url-action "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])] +(defn theme [competition-id] + (let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])] (first (:content (first p))))) (defn links-on-entry [content] @@ -49,8 +48,8 @@ (defn format-entry-type [unformatted-type] (if (= unformatted-type "Jam Entry") "jam" "compo")) -(defn entry-details [entry-id] - (let [content (first (html/select (fetch-url (url-entry entry-id)) [:div#compo2])) +(defn entry-details [competition-id entry-id] + (let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2])) title-parts (first (html/texts (html/select [content] [:h3]))) [title author unformatted-type] (split title-parts #" - ") links (links-on-entry content)