Fix the models and make the scraping process work. Time to fish!
This commit is contained in:
parent
276a36965f
commit
84114633ac
6 changed files with 35 additions and 18 deletions
|
@ -10,6 +10,7 @@
|
|||
[enlive "1.1.5"]
|
||||
[image-resizer "0.1.6"]
|
||||
[clj-http "1.0.0"]
|
||||
[mysql/mysql-connector-java "5.1.25"]
|
||||
[korma "0.3.3"]
|
||||
[me.raynes/fs "1.4.6"]]
|
||||
:main luduverse.core
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
(limit 1)))))
|
||||
|
||||
(defn create! [fields]
|
||||
(let [username (user/unique-user (:author fields))
|
||||
(let [username (user/unique-user (:username fields))
|
||||
competition-id (:id (competition/latest))]
|
||||
(if-not (exists? (:ld_uid fields))
|
||||
(insert entries (values {:ld_uid (:ld_uid fields)
|
||||
|
|
|
@ -5,14 +5,14 @@
|
|||
(defn user-id [username]
|
||||
(first (select users
|
||||
(fields :id)
|
||||
(where {:name username})
|
||||
(where {:username username})
|
||||
(limit 1))))
|
||||
|
||||
(defn exists? [username]
|
||||
(not (empty? (user-id username))))
|
||||
|
||||
(defn create! [username]
|
||||
(insert users (values {:name username})))
|
||||
(insert users (values {:username username})))
|
||||
|
||||
(defn unique-user [username]
|
||||
(if-not (exists? username) (create! username))
|
||||
|
|
|
@ -1,18 +1,16 @@
|
|||
(ns luduverse.db
|
||||
(:require [luduverse.db-schema :as schema]
|
||||
[korma.core :refer :all]
|
||||
(:require [korma.core :refer :all]
|
||||
[korma.db :refer [defdb mysql]]))
|
||||
|
||||
(defdb db (mysql {:db "luduverse"
|
||||
:user "root"
|
||||
:password "dev"
|
||||
:host "localhost"}))
|
||||
:password "dev"}))
|
||||
|
||||
(declare entities competitions users sources)
|
||||
|
||||
(defentity entries
|
||||
(database db)
|
||||
(belongs-to contests)
|
||||
(belongs-to competitions)
|
||||
(belongs-to users)
|
||||
(has-many sources))
|
||||
|
||||
|
@ -27,4 +25,3 @@
|
|||
(defentity sources
|
||||
(database db)
|
||||
(belongs-to entities))
|
||||
|
||||
|
|
|
@ -12,15 +12,19 @@
|
|||
;; https://github.com/arg-games/ldview
|
||||
|
||||
(defn base-path
|
||||
([] (str (noir-io/resource-path) "img"))
|
||||
;([] (str (noir-io/resource-path) "img"))
|
||||
([] "resources/img")
|
||||
([competition-id] (str (base-path) "/ld" competition-id "/")))
|
||||
|
||||
(defn create-file-structure [competition-id]
|
||||
(let [path (base-path competition-id)]
|
||||
(fs/mkdirs (str path "/thumbs/"))
|
||||
(fs/mkdirs (str path "/fullscreen/"))))
|
||||
(fs/mkdirs (str path "/fullscreen/"))
|
||||
(fs/mkdirs (str path "/raw/"))))
|
||||
|
||||
(defn image-name [competition-id folder entry-id number]
|
||||
;; FIXME: JPEGImageWriter is not thread save? Whats wrong here?
|
||||
;; IIOException Invalid argument to native writeImage com.sun.imageio.plugins.jpeg.JPEGImageWriter.writeImage (JPEGImageWriter.java:-2)
|
||||
(str (base-path competition-id) folder "/" entry-id "_" number ".png"))
|
||||
|
||||
(defn to-square [file new-size]
|
||||
|
@ -48,8 +52,9 @@
|
|||
(io/copy bodystream (io/file target-file))))
|
||||
|
||||
(defn save-images-for-entry [competition-id entry]
|
||||
(create-file-structure competition-id)
|
||||
(doseq [image-url (:images entry)]
|
||||
(let [id (:ld_uid new-entry)
|
||||
(let [id (:ld_uid entry)
|
||||
number (last (first (re-seq #"shot([0-9]+)" image-url)))
|
||||
raw-image-path (image-name competition-id "raw" id number)]
|
||||
(if-not (fs/exists? raw-image-path)
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
(ns luduverse.ld-scraper
|
||||
(:require [net.cgrand.enlive-html :as html]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.string :refer [split trim]]))
|
||||
[clojure.string :refer [split trim]]
|
||||
[luduverse.ld-images :as image]
|
||||
[luduverse.db-models.entry :as entry]
|
||||
[luduverse.db-models.competition :as competition]))
|
||||
|
||||
;; NOTE: This part is grabbed from the open source lib
|
||||
;; https://github.com/arg-games/ldview
|
||||
|
@ -56,19 +59,30 @@ and simply fetch it from Wikipedia. End of discussion :)"
|
|||
(defn entry-details [competition-id entry-id]
|
||||
(let [content (first (html/select (fetch-url (url-entry competition-id entry-id)) [:div#compo2]))
|
||||
title-parts (first (html/texts (html/select [content] [:h3])))
|
||||
[title author unformatted-type] (split title-parts #" - ")
|
||||
[title username unformatted-type] (split title-parts #" - ")
|
||||
links (links-on-entry content)
|
||||
description (html/text (nth (html/select [content] [:p]) 2))
|
||||
images (images-on-entry content)]
|
||||
{:ld_uid entry-id
|
||||
:title title
|
||||
:description description
|
||||
:author author
|
||||
:username username
|
||||
:type (format-entry-type unformatted-type)
|
||||
:links links
|
||||
:images images}))
|
||||
|
||||
|
||||
;; Save all what we get
|
||||
(defn save-entry [competition-id entry]
|
||||
"Here we save the stuff ...")
|
||||
;; Save all what we got
|
||||
(defn save-entry [competition-id entry-id]
|
||||
(let [entry (entry-details competition-id entry-id)]
|
||||
(if-not (competition/exists? competition-id) (competition/create! competition-id (theme competition-id)))
|
||||
(if-not (entry/exists? (:ld_uid entry)) (entry/create! entry))
|
||||
(image/save-images-for-entry competition-id entry)))
|
||||
|
||||
(defn save-page [competition-id page]
|
||||
(doall (pmap #(save-entry competition-id %) (entries-on-page competition-id page)))
|
||||
true)
|
||||
|
||||
(defn save-competition [competition-id]
|
||||
(doseq [page (range (number-of-pages competition-id))] (save-page competition-id page))
|
||||
true)
|
||||
|
|
Loading…
Reference in a new issue