fix tha nasty competition-id issue and make it run again
This commit is contained in:
parent
03ec7844e0
commit
8066186397
3 changed files with 26 additions and 33 deletions
|
@ -1,11 +1,10 @@
|
|||
(ns ldview.models.competition
|
||||
(:use korma.core
|
||||
[ldview.models.db]
|
||||
[ldview.tasks.scrape :as configuration :only [competition]]))
|
||||
[ldview.models.db]))
|
||||
|
||||
(defn latest []
|
||||
(first (select competitions
|
||||
(where {:number configuration/competition}) ; FIXME: Make it dynamic
|
||||
(order :number :DESC)
|
||||
(limit 1))))
|
||||
|
||||
(defn exists? [number]
|
||||
|
|
|
@ -6,10 +6,6 @@
|
|||
[ldview.models.competition :as competition]
|
||||
[ldview.models.schema :as schema]))
|
||||
|
||||
;; TODO: This is ugly and not the right way to do. This should be cleaned up
|
||||
;; so the functions does not have side effects
|
||||
(def competition (atom 0))
|
||||
|
||||
(defn cleanup! []
|
||||
(if (fs/exists? images/base-path)
|
||||
(fs/delete-dir images/base-path))
|
||||
|
@ -29,15 +25,14 @@
|
|||
(images/sourceimage->thumb raw-image-path (images/image-name "thumbs" id number))))
|
||||
(:images new-entry))))
|
||||
|
||||
(defn fetch-all-content []
|
||||
(let [pages (range 1)] ;(scrape/number-of-pages)]
|
||||
(for [page pages]
|
||||
(for [ld-uid (scrape/entries-on-page (inc page))]
|
||||
; TODO: Make it multithreaded
|
||||
(defn fetch-all-content [competition-id]
|
||||
(let [pages (range 5)] ;(scrape/number-of-pages competition-id)]
|
||||
(doseq [page pages]
|
||||
(doseq [ld-uid (scrape/entries-on-page competition-id (inc page))]
|
||||
(if-not (entry/exists? ld-uid)
|
||||
(save-entry (scrape/entry-details ld-uid)))))))
|
||||
|
||||
(defn load-competition [id]
|
||||
(swap! competition id)
|
||||
(competition/create! id (scrape/theme))
|
||||
(fetch-all-content))
|
||||
(save-entry! (scrape/entry-details competition-id ld-uid)))))))
|
||||
|
||||
(defn load-competition [competition-id]
|
||||
(competition/create! competition-id (scrape/theme competition-id))
|
||||
(fetch-all-content competition-id))
|
||||
|
|
|
@ -2,17 +2,16 @@
|
|||
(:require [net.cgrand.enlive-html :as html]
|
||||
[clj-http.client :as http]
|
||||
[clojure.java.io :as io])
|
||||
(:use [clojure.string :only (split)]
|
||||
[ldview.tasks.runner :only (@competition)]))
|
||||
(:use [clojure.string :only (split)]))
|
||||
|
||||
(defn url-action [action]
|
||||
(str "http://www.ludumdare.com/compo/ludum-dare-" competition "/?action=" action))
|
||||
(defn url-action [competition-id action]
|
||||
(str "http://www.ludumdare.com/compo/ludum-dare-" competition-id "/?action=" action))
|
||||
|
||||
(defn url-page [page]
|
||||
(str (url-action "preview") "&start=" page))
|
||||
(defn url-page [competition-id page]
|
||||
(str (url-action competition-id "preview") "&start=" page))
|
||||
|
||||
(defn url-entry [entry-id]
|
||||
(str (url-action "preview") "&uid=" entry-id))
|
||||
(defn url-entry [competition-id entry-id]
|
||||
(str (url-action competition-id "preview") "&uid=" entry-id))
|
||||
|
||||
; Helpers to fetch an html resource or save an url from the web into a
|
||||
; local file. This thow functions will be needed to pull the content from
|
||||
|
@ -27,17 +26,17 @@
|
|||
|
||||
; The actual scraping process. We crawl through the entire content results
|
||||
; and fetch the relevant information from the DOM.
|
||||
(defn number-of-pages []
|
||||
(let [p (second (html/select (fetch-url (url-action "preview")) [:div#compo2 :> :p]))]
|
||||
(defn number-of-pages [competition-id]
|
||||
(let [p (second (html/select (fetch-url (url-action competition-id "preview")) [:div#compo2 :> :p]))]
|
||||
(read-string (html/text (last (butlast (html/select [p] [:a])))))))
|
||||
|
||||
(defn entries-on-page [page]
|
||||
(let [tds (html/select (fetch-url (url-page page)) #{[:.alt-1], [:.alt-2]})
|
||||
(defn entries-on-page [competition-id page]
|
||||
(let [tds (html/select (fetch-url (url-page competition-id page)) #{[:.alt-1], [:.alt-2]})
|
||||
links (map #(:href %1) (map #(:attrs (first (html/select [%1] [:a]))) tds))]
|
||||
(map #(last (split %1 #"=")) links)))
|
||||
|
||||
(defn theme []
|
||||
(let [p (html/select (fetch-url (url-action "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
|
||||
(defn theme [competition-id]
|
||||
(let [p (html/select (fetch-url (url-action competition-id "preview")) [:div#content :> :div.post :> :div.entry :> :p html/first-child :a])]
|
||||
(first (:content (first p)))))
|
||||
|
||||
(defn links-on-entry [content]
|
||||
|
@ -49,8 +48,8 @@
|
|||
(defn format-entry-type [unformatted-type]
|
||||
(if (= unformatted-type "Jam Entry") "jam" "compo"))
|
||||
|
||||
(defn entry-details [entry-id]
|
||||
(let [content (first (html/select (fetch-url (url-entry entry-id)) [:div#compo2]))
|
||||
(defn entry-details [competition-id entry-id]
|
||||
(let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
|
||||
title-parts (first (html/texts (html/select [content] [:h3])))
|
||||
[title author unformatted-type] (split title-parts #" - ")
|
||||
links (links-on-entry content)
|
||||
|
|
Loading…
Reference in a new issue