Fix the models and make the scraping process work. Time to fish!

This commit is contained in:
Aaron Mueller 2014-09-28 13:30:37 +02:00
parent 276a36965f
commit 84114633ac
6 changed files with 35 additions and 18 deletions

View file

@ -10,6 +10,7 @@
[enlive "1.1.5"]
[image-resizer "0.1.6"]
[clj-http "1.0.0"]
[mysql/mysql-connector-java "5.1.25"]
[korma "0.3.3"]
[me.raynes/fs "1.4.6"]]
:main luduverse.core

View file

@ -19,7 +19,7 @@
(limit 1)))))
(defn create! [fields]
(let [username (user/unique-user (:author fields))
(let [username (user/unique-user (:username fields))
competition-id (:id (competition/latest))]
(if-not (exists? (:ld_uid fields))
(insert entries (values {:ld_uid (:ld_uid fields)

View file

@ -5,14 +5,14 @@
(defn user-id [username]
(first (select users
(fields :id)
(where {:name username})
(where {:username username})
(limit 1))))
(defn exists? [username]
(not (empty? (user-id username))))
(defn create! [username]
(insert users (values {:name username})))
(insert users (values {:username username})))
(defn unique-user [username]
(if-not (exists? username) (create! username))

View file

@ -1,18 +1,16 @@
(ns luduverse.db
(:require [luduverse.db-schema :as schema]
[korma.core :refer :all]
(:require [korma.core :refer :all]
[korma.db :refer [defdb mysql]]))
(defdb db (mysql {:db "luduverse"
:user "root"
:password "dev"
:host "localhost"}))
:password "dev"}))
(declare entities competitions users sources)
(defentity entries
(database db)
(belongs-to contests)
(belongs-to competitions)
(belongs-to users)
(has-many sources))
@ -27,4 +25,3 @@
(defentity sources
(database db)
(belongs-to entities))

View file

@ -12,15 +12,19 @@
;; https://github.com/arg-games/ldview
(defn base-path
([] (str (noir-io/resource-path) "img"))
;([] (str (noir-io/resource-path) "img"))
([] "resources/img")
([competition-id] (str (base-path) "/ld" competition-id "/")))
(defn create-file-structure [competition-id]
(let [path (base-path competition-id)]
(fs/mkdirs (str path "/thumbs/"))
(fs/mkdirs (str path "/fullscreen/"))))
(fs/mkdirs (str path "/fullscreen/"))
(fs/mkdirs (str path "/raw/"))))
(defn image-name [competition-id folder entry-id number]
;; FIXME: JPEGImageWriter is not thread save? Whats wrong here?
;; IIOException Invalid argument to native writeImage com.sun.imageio.plugins.jpeg.JPEGImageWriter.writeImage (JPEGImageWriter.java:-2)
(str (base-path competition-id) folder "/" entry-id "_" number ".png"))
(defn to-square [file new-size]
@ -48,8 +52,9 @@
(io/copy bodystream (io/file target-file))))
(defn save-images-for-entry [competition-id entry]
(create-file-structure competition-id)
(doseq [image-url (:images entry)]
(let [id (:ld_uid new-entry)
(let [id (:ld_uid entry)
number (last (first (re-seq #"shot([0-9]+)" image-url)))
raw-image-path (image-name competition-id "raw" id number)]
(if-not (fs/exists? raw-image-path)

View file

@ -1,7 +1,10 @@
(ns luduverse.ld-scraper
(:require [net.cgrand.enlive-html :as html]
[clojure.java.io :as io]
[clojure.string :refer [split trim]]))
[clojure.string :refer [split trim]]
[luduverse.ld-images :as image]
[luduverse.db-models.entry :as entry]
[luduverse.db-models.competition :as competition]))
;; NOTE: This part is grabbed from the open source lib
;; https://github.com/arg-games/ldview
@ -56,19 +59,30 @@ and simply fetch it from Wikipedia. End of discussion :)"
(defn entry-details [competition-id entry-id]
(let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
title-parts (first (html/texts (html/select [content] [:h3])))
[title author unformatted-type] (split title-parts #" - ")
[title username unformatted-type] (split title-parts #" - ")
links (links-on-entry content)
description (html/text (nth (html/select [content] [:p]) 2))
images (images-on-entry content)]
{:ld_uid entry-id
:title title
:description description
:author author
:username username
:type (format-entry-type unformatted-type)
:links links
:images images}))
;; Save all what we get
(defn save-entry [competition-id entry]
"Here we save the stuff ...")
;; Save all what we got
(defn save-entry [competition-id entry-id]
(let [entry (entry-details competition-id entry-id)]
(if-not (competition/exists? competition-id) (competition/create! competition-id (theme competition-id)))
(if-not (entry/exists? (:ld_uid entry)) (entry/create! entry))
(image/save-images-for-entry competition-id entry)))
(defn save-page [competition-id page]
(doall (pmap #(save-entry competition-id %) (entries-on-page competition-id page)))
true)
(defn save-competition [competition-id]
(doseq [page (range (number-of-pages competition-id))] (save-page competition-id page))
true)