diff options
author | Alan Pearce | 2014-10-10 15:53:30 +0100 |
---|---|---|
committer | Alan Pearce | 2014-10-10 16:10:09 +0100 |
commit | 062fcf6126c71601fa677bfab13bacdab78ec857 (patch) | |
tree | f6e03079d392a3c00650a7453705104aa1d38d28 | |
parent | bd5a901a5081d01df16ab3a750fdfa403391b3d2 (diff) | |
download | microformats-062fcf6126c71601fa677bfab13bacdab78ec857.tar.lz microformats-062fcf6126c71601fa677bfab13bacdab78ec857.tar.zst microformats-062fcf6126c71601fa677bfab13bacdab78ec857.zip |
Add function to normalise a URL
-rw-r--r-- | project.clj | 3 | ||||
-rw-r--r-- | src/microformats/parser.clj | 19 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 6 |
3 files changed, 21 insertions, 7 deletions
diff --git a/project.clj b/project.clj index 5b0d719..4223379 100644 --- a/project.clj +++ b/project.clj @@ -4,7 +4,8 @@ :license {:name "MIT License" :url "http://opensource.org/licenses/MIT"} :dependencies [[org.clojure/clojure "1.6.0"] - [enlive "1.1.5"]] + [enlive "1.1.5"] + [clojurewerkz/urly "1.0.0"]] :profiles {:dev {:dependencies [[expectations "2.0.9"]]}} :plugins [[lein-expectations "0.0.7"]] :main microformats.parser) diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index e3a244e..61c1941 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -2,7 +2,8 @@ (:require [net.cgrand.enlive-html :as html] [clojure.zip :as z] [clojure.core.reducers :as r] - [clojure.string :as str])) + [clojure.string :as str] + [clojurewerkz.urly.core :as url])) (defmacro anacond [& clauses] @@ -68,11 +69,17 @@ (defn get-base-url "Find the base-url of a document." [document] - (-> document - (html/select [:head :> [:base (html/attr? :href)]]) - first - :attrs - :href)) + (or (-> document + (html/select [:head :> [:base (html/attr? :href)]]) + first + :attrs + :href) + "")) + +(defn normalise-url + "Normalise a URL" + [root url] + (url/resolve (get-base-url root) url)) (defn get-value-class "Get the value class of elements" diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index 7344fde..0e72e10 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -54,6 +54,12 @@ (expect "http://example.com" (get-base-url (snippet "<head><base href=\"http://example.com\"></head>"))) +(expect "/" + (normalise-url (snippet "<head></head>") "/")) + +(expect "http://example.com/" + (normalise-url (snippet "<head><base href=\"http://example.com\"></head>") "/")) + (expect {:name '("Name")} (parse-p (snippet "<p class=\"p-name\"><span class=\"value\">Name</span></p>"))) |