fix tha nasty competition-id issue and make it run again

This commit is contained in:
Aaron Mueller 2014-01-30 21:02:25 +01:00
parent 03ec7844e0
commit 8066186397
3 changed files with 26 additions and 33 deletions

View file

@ -1,11 +1,10 @@
(ns ldview.models.competition (ns ldview.models.competition
(:use korma.core (:use korma.core
[ldview.models.db] [ldview.models.db]))
[ldview.tasks.scrape :as configuration :only [competition]]))
(defn latest [] (defn latest []
(first (select competitions (first (select competitions
(where {:number configuration/competition}) ; FIXME: Make it dynamic (order :number :DESC)
(limit 1)))) (limit 1))))
(defn exists? [number] (defn exists? [number]

View file

@ -6,10 +6,6 @@
[ldview.models.competition :as competition] [ldview.models.competition :as competition]
[ldview.models.schema :as schema])) [ldview.models.schema :as schema]))
;; TODO: This is ugly and not the right way to do. This should be cleaned up
;; so the functions does not have side effects
(def competition (atom 0))
(defn cleanup! [] (defn cleanup! []
(if (fs/exists? images/base-path) (if (fs/exists? images/base-path)
(fs/delete-dir images/base-path)) (fs/delete-dir images/base-path))
@ -29,15 +25,14 @@
(images/sourceimage->thumb raw-image-path (images/image-name "thumbs" id number)))) (images/sourceimage->thumb raw-image-path (images/image-name "thumbs" id number))))
(:images new-entry)))) (:images new-entry))))
(defn fetch-all-content [] ; TODO: Make it multithreaded
(let [pages (range 1)] ;(scrape/number-of-pages)] (defn fetch-all-content [competition-id]
(for [page pages] (let [pages (range 5)] ;(scrape/number-of-pages competition-id)]
(for [ld-uid (scrape/entries-on-page (inc page))] (doseq [page pages]
(doseq [ld-uid (scrape/entries-on-page competition-id (inc page))]
(if-not (entry/exists? ld-uid) (if-not (entry/exists? ld-uid)
(save-entry (scrape/entry-details ld-uid))))))) (save-entry! (scrape/entry-details competition-id ld-uid)))))))
(defn load-competition [id]
(swap! competition id)
(competition/create! id (scrape/theme))
(fetch-all-content))
(defn load-competition [competition-id]
(competition/create! competition-id (scrape/theme competition-id))
(fetch-all-content competition-id))

View file

@ -2,17 +2,16 @@
(:require [net.cgrand.enlive-html :as html] (:require [net.cgrand.enlive-html :as html]
[clj-http.client :as http] [clj-http.client :as http]
[clojure.java.io :as io]) [clojure.java.io :as io])
(:use [clojure.string :only (split)] (:use [clojure.string :only (split)]))
[ldview.tasks.runner :only (@competition)]))
(defn url-action [action] (defn url-action [competition-id action]
(str "http://www.ludumdare.com/compo/ludum-dare-" competition "/?action=" action)) (str "http://www.ludumdare.com/compo/ludum-dare-" competition-id "/?action=" action))
(defn url-page [page] (defn url-page [competition-id page]
(str (url-action "preview") "&start=" page)) (str (url-action competition-id "preview") "&start=" page))
(defn url-entry [entry-id] (defn url-entry [competition-id entry-id]
(str (url-action "preview") "&uid=" entry-id)) (str (url-action competition-id "preview") "&uid=" entry-id))
; Helpers to fetch an html resource or save an url from the web into a ; Helpers to fetch an html resource or save an url from the web into a
; local file. This thow functions will be needed to pull the content from ; local file. This thow functions will be needed to pull the content from
@ -27,17 +26,17 @@
; The actual scraping process. We crawl through the entire content results ; The actual scraping process. We crawl through the entire content results
; and fetch the relevant information from the DOM. ; and fetch the relevant information from the DOM.
(defn number-of-pages [] (defn number-of-pages [competition-id]
(let [p (second (html/select (fetch-url (url-action "preview")) [:div#compo2 :> :p]))] (let [p (second (html/select (fetch-url (url-action competition-id "preview")) [:div#compo2 :> :p]))]
(read-string (html/text (last (butlast (html/select [p] [:a]))))))) (read-string (html/text (last (butlast (html/select [p] [:a])))))))
(defn entries-on-page [page] (defn entries-on-page [competition-id page]
(let [tds (html/select (fetch-url (url-page page)) #{[:.alt-1], [:.alt-2]}) (let [tds (html/select (fetch-url (url-page competition-id page)) #{[:.alt-1], [:.alt-2]})
links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))] links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
(map #(last (split %1 #"=")) links))) (map #(last (split %1 #"=")) links)))
(defn theme [] (defn theme [competition-id]
(let [p (html/select (fetch-url (url-action "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])] (let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
(first (:content (first p))))) (first (:content (first p)))))
(defn links-on-entry [content] (defn links-on-entry [content]
@ -49,8 +48,8 @@
(defn format-entry-type [unformatted-type] (defn format-entry-type [unformatted-type]
(if (= unformatted-type "Jam Entry") "jam" "compo")) (if (= unformatted-type "Jam Entry") "jam" "compo"))
(defn entry-details [entry-id] (defn entry-details [competition-id entry-id]
(let [content (first (html/select (fetch-url (url-entry entry-id)) [:div#compo2])) (let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
title-parts (first (html/texts (html/select [content] [:h3]))) title-parts (first (html/texts (html/select [content] [:h3])))
[title author unformatted-type] (split title-parts #" - ") [title author unformatted-type] (split title-parts #" - ")
links (links-on-entry content) links (links-on-entry content)