fix tha nasty competition-id issue and make it run again

This commit is contained in:
Aaron Mueller 2014-01-30 21:02:25 +01:00
parent 03ec7844e0
commit 8066186397
3 changed files with 26 additions and 33 deletions

View file

@ -1,11 +1,10 @@
(ns ldview.models.competition
(:use korma.core
[ldview.models.db]
[ldview.tasks.scrape :as configuration :only [competition]]))
[ldview.models.db]))
(defn latest []
(first (select competitions
(where {:number configuration/competition}) ; FIXME: Make it dynamic
(order :number :DESC)
(limit 1))))
(defn exists? [number]

View file

@ -6,10 +6,6 @@
[ldview.models.competition :as competition]
[ldview.models.schema :as schema]))
;; TODO: This is ugly and not the right way to do. This should be cleaned up
;; so the functions does not have side effects
(def competition (atom 0))
(defn cleanup! []
(if (fs/exists? images/base-path)
(fs/delete-dir images/base-path))
@ -29,15 +25,14 @@
(images/sourceimage->thumb raw-image-path (images/image-name "thumbs" id number))))
(:images new-entry))))
(defn fetch-all-content []
(let [pages (range 1)] ;(scrape/number-of-pages)]
(for [page pages]
(for [ld-uid (scrape/entries-on-page (inc page))]
; TODO: Make it multithreaded
(defn fetch-all-content [competition-id]
(let [pages (range 5)] ;(scrape/number-of-pages competition-id)]
(doseq [page pages]
(doseq [ld-uid (scrape/entries-on-page competition-id (inc page))]
(if-not (entry/exists? ld-uid)
(save-entry (scrape/entry-details ld-uid)))))))
(defn load-competition [id]
(swap! competition id)
(competition/create! id (scrape/theme))
(fetch-all-content))
(save-entry! (scrape/entry-details competition-id ld-uid)))))))
(defn load-competition [competition-id]
(competition/create! competition-id (scrape/theme competition-id))
(fetch-all-content competition-id))

View file

@ -2,17 +2,16 @@
(:require [net.cgrand.enlive-html :as html]
[clj-http.client :as http]
[clojure.java.io :as io])
(:use [clojure.string :only (split)]
[ldview.tasks.runner :only (@competition)]))
(:use [clojure.string :only (split)]))
(defn url-action [action]
(str "http://www.ludumdare.com/compo/ludum-dare-" competition "/?action=" action))
(defn url-action [competition-id action]
(str "http://www.ludumdare.com/compo/ludum-dare-" competition-id "/?action=" action))
(defn url-page [page]
(str (url-action "preview") "&start=" page))
(defn url-page [competition-id page]
(str (url-action competition-id "preview") "&start=" page))
(defn url-entry [entry-id]
(str (url-action "preview") "&uid=" entry-id))
(defn url-entry [competition-id entry-id]
(str (url-action competition-id "preview") "&uid=" entry-id))
; Helpers to fetch an html resource or save an url from the web into a
; local file. This thow functions will be needed to pull the content from
@ -27,17 +26,17 @@
; The actual scraping process. We crawl through the entire content results
; and fetch the relevant information from the DOM.
(defn number-of-pages []
(let [p (second (html/select (fetch-url (url-action "preview")) [:div#compo2 :> :p]))]
(defn number-of-pages [competition-id]
(let [p (second (html/select (fetch-url (url-action competition-id "preview")) [:div#compo2 :> :p]))]
(read-string (html/text (last (butlast (html/select [p] [:a])))))))
(defn entries-on-page [page]
(let [tds (html/select (fetch-url (url-page page)) #{[:.alt-1], [:.alt-2]})
(defn entries-on-page [competition-id page]
(let [tds (html/select (fetch-url (url-page competition-id page)) #{[:.alt-1], [:.alt-2]})
links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
(map #(last (split %1 #"=")) links)))
(defn theme []
(let [p (html/select (fetch-url (url-action "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
(defn theme [competition-id]
(let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
(first (:content (first p)))))
(defn links-on-entry [content]
@ -49,8 +48,8 @@
(defn format-entry-type [unformatted-type]
(if (= unformatted-type "Jam Entry") "jam" "compo"))
(defn entry-details [entry-id]
(let [content (first (html/select (fetch-url (url-entry entry-id)) [:div#compo2]))
(defn entry-details [competition-id entry-id]
(let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
title-parts (first (html/texts (html/select [content] [:h3])))
[title author unformatted-type] (split title-parts #" - ")
links (links-on-entry content)