Fix the models and make the scraping process work. Time to fish!

This commit is contained in:
Aaron Mueller 2014-09-28 13:30:37 +02:00
parent 276a36965f
commit 84114633ac
6 changed files with 35 additions and 18 deletions

View file

@ -10,6 +10,7 @@
[enlive "1.1.5"] [enlive "1.1.5"]
[image-resizer "0.1.6"] [image-resizer "0.1.6"]
[clj-http "1.0.0"] [clj-http "1.0.0"]
[mysql/mysql-connector-java "5.1.25"]
[korma "0.3.3"] [korma "0.3.3"]
[me.raynes/fs "1.4.6"]] [me.raynes/fs "1.4.6"]]
:main luduverse.core :main luduverse.core

View file

@ -19,7 +19,7 @@
(limit 1))))) (limit 1)))))
(defn create! [fields] (defn create! [fields]
(let [username (user/unique-user (:author fields)) (let [username (user/unique-user (:username fields))
competition-id (:id (competition/latest))] competition-id (:id (competition/latest))]
(if-not (exists? (:ld_uid fields)) (if-not (exists? (:ld_uid fields))
(insert entries (values {:ld_uid (:ld_uid fields) (insert entries (values {:ld_uid (:ld_uid fields)

View file

@ -5,14 +5,14 @@
(defn user-id [username] (defn user-id [username]
(first (select users (first (select users
(fields :id) (fields :id)
(where {:name username}) (where {:username username})
(limit 1)))) (limit 1))))
(defn exists? [username] (defn exists? [username]
(not (empty? (user-id username)))) (not (empty? (user-id username))))
(defn create! [username] (defn create! [username]
(insert users (values {:name username}))) (insert users (values {:username username})))
(defn unique-user [username] (defn unique-user [username]
(if-not (exists? username) (create! username)) (if-not (exists? username) (create! username))

View file

@ -1,18 +1,16 @@
(ns luduverse.db (ns luduverse.db
(:require [luduverse.db-schema :as schema] (:require [korma.core :refer :all]
[korma.core :refer :all]
[korma.db :refer [defdb mysql]])) [korma.db :refer [defdb mysql]]))
(defdb db (mysql {:db "luduverse" (defdb db (mysql {:db "luduverse"
:user "root" :user "root"
:password "dev" :password "dev"}))
:host "localhost"}))
(declare entities competitions users sources) (declare entities competitions users sources)
(defentity entries (defentity entries
(database db) (database db)
(belongs-to contests) (belongs-to competitions)
(belongs-to users) (belongs-to users)
(has-many sources)) (has-many sources))
@ -27,4 +25,3 @@
(defentity sources (defentity sources
(database db) (database db)
(belongs-to entities)) (belongs-to entities))

View file

@ -12,15 +12,19 @@
;; https://github.com/arg-games/ldview ;; https://github.com/arg-games/ldview
(defn base-path (defn base-path
([] (str (noir-io/resource-path) "img")) ;([] (str (noir-io/resource-path) "img"))
([] "resources/img")
([competition-id] (str (base-path) "/ld" competition-id "/"))) ([competition-id] (str (base-path) "/ld" competition-id "/")))
(defn create-file-structure [competition-id] (defn create-file-structure [competition-id]
(let [path (base-path competition-id)] (let [path (base-path competition-id)]
(fs/mkdirs (str path "/thumbs/")) (fs/mkdirs (str path "/thumbs/"))
(fs/mkdirs (str path "/fullscreen/")))) (fs/mkdirs (str path "/fullscreen/"))
(fs/mkdirs (str path "/raw/"))))
(defn image-name [competition-id folder entry-id number] (defn image-name [competition-id folder entry-id number]
;; FIXME: JPEGImageWriter is not thread save? Whats wrong here?
;; IIOException Invalid argument to native writeImage com.sun.imageio.plugins.jpeg.JPEGImageWriter.writeImage (JPEGImageWriter.java:-2)
(str (base-path competition-id) folder "/" entry-id "_" number ".png")) (str (base-path competition-id) folder "/" entry-id "_" number ".png"))
(defn to-square [file new-size] (defn to-square [file new-size]
@ -48,8 +52,9 @@
(io/copy bodystream (io/file target-file)))) (io/copy bodystream (io/file target-file))))
(defn save-images-for-entry [competition-id entry] (defn save-images-for-entry [competition-id entry]
(create-file-structure competition-id)
(doseq [image-url (:images entry)] (doseq [image-url (:images entry)]
(let [id (:ld_uid new-entry) (let [id (:ld_uid entry)
number (last (first (re-seq #"shot([0-9]+)" image-url))) number (last (first (re-seq #"shot([0-9]+)" image-url)))
raw-image-path (image-name competition-id "raw" id number)] raw-image-path (image-name competition-id "raw" id number)]
(if-not (fs/exists? raw-image-path) (if-not (fs/exists? raw-image-path)

View file

@ -1,7 +1,10 @@
(ns luduverse.ld-scraper (ns luduverse.ld-scraper
(:require [net.cgrand.enlive-html :as html] (:require [net.cgrand.enlive-html :as html]
[clojure.java.io :as io] [clojure.java.io :as io]
[clojure.string :refer [split trim]])) [clojure.string :refer [split trim]]
[luduverse.ld-images :as image]
[luduverse.db-models.entry :as entry]
[luduverse.db-models.competition :as competition]))
;; NOTE: This part is grabbed from the open source lib ;; NOTE: This part is grabbed from the open source lib
;; https://github.com/arg-games/ldview ;; https://github.com/arg-games/ldview
@ -56,19 +59,30 @@ and simply fetch it from Wikipedia. End of discussion :)"
(defn entry-details [competition-id entry-id] (defn entry-details [competition-id entry-id]
(let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2])) (let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
title-parts (first (html/texts (html/select [content] [:h3]))) title-parts (first (html/texts (html/select [content] [:h3])))
[title author unformatted-type] (split title-parts #" - ") [title username unformatted-type] (split title-parts #" - ")
links (links-on-entry content) links (links-on-entry content)
description (html/text (nth (html/select [content] [:p]) 2)) description (html/text (nth (html/select [content] [:p]) 2))
images (images-on-entry content)] images (images-on-entry content)]
{:ld_uid entry-id {:ld_uid entry-id
:title title :title title
:description description :description description
:author author :username username
:type (format-entry-type unformatted-type) :type (format-entry-type unformatted-type)
:links links :links links
:images images})) :images images}))
;; Save all what we get ;; Save all what we got
(defn save-entry [competition-id entry] (defn save-entry [competition-id entry-id]
"Here we save the stuff ...") (let [entry (entry-details competition-id entry-id)]
(if-not (competition/exists? competition-id) (competition/create! competition-id (theme competition-id)))
(if-not (entry/exists? (:ld_uid entry)) (entry/create! entry))
(image/save-images-for-entry competition-id entry)))
(defn save-page [competition-id page]
(doall (pmap #(save-entry competition-id %) (entries-on-page competition-id page)))
true)
(defn save-competition [competition-id]
(doseq [page (range (number-of-pages competition-id))] (save-page competition-id page))
true)