about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-10 15:53:30 +0100
committerAlan Pearce2014-10-10 16:10:09 +0100
commit062fcf6126c71601fa677bfab13bacdab78ec857 (patch)
treef6e03079d392a3c00650a7453705104aa1d38d28
parentbd5a901a5081d01df16ab3a750fdfa403391b3d2 (diff)
downloadmicroformats-062fcf6126c71601fa677bfab13bacdab78ec857.tar.lz
microformats-062fcf6126c71601fa677bfab13bacdab78ec857.tar.zst
microformats-062fcf6126c71601fa677bfab13bacdab78ec857.zip
Add function to normalise a URL
-rw-r--r--project.clj3
-rw-r--r--src/microformats/parser.clj19
-rw-r--r--test/microformats/parser_expectations.clj6
3 files changed, 21 insertions, 7 deletions
diff --git a/project.clj b/project.clj
index 5b0d719..4223379 100644
--- a/project.clj
+++ b/project.clj
@@ -4,7 +4,8 @@
   :license {:name "MIT License"
             :url "http://opensource.org/licenses/MIT"}
   :dependencies [[org.clojure/clojure "1.6.0"]
-                 [enlive "1.1.5"]]
+                 [enlive "1.1.5"]
+                 [clojurewerkz/urly "1.0.0"]]
   :profiles {:dev {:dependencies [[expectations "2.0.9"]]}}
   :plugins [[lein-expectations "0.0.7"]]
   :main microformats.parser)
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index e3a244e..61c1941 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -2,7 +2,8 @@
   (:require [net.cgrand.enlive-html :as html]
             [clojure.zip :as z]
             [clojure.core.reducers :as r]
-            [clojure.string :as str]))
+            [clojure.string :as str]
+            [clojurewerkz.urly.core :as url]))
 
 (defmacro anacond
   [& clauses]
@@ -68,11 +69,17 @@
 (defn get-base-url
   "Find the base-url of a document."
   [document]
-  (-> document
-      (html/select [:head :> [:base (html/attr? :href)]])
-      first
-      :attrs
-      :href))
+  (or (-> document
+          (html/select [:head :> [:base (html/attr? :href)]])
+          first
+          :attrs
+          :href)
+      ""))
+
+(defn normalise-url
+  "Normalise a URL"
+  [root url]
+  (url/resolve (get-base-url root) url))
 
 (defn get-value-class
   "Get the value class of elements"
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index 7344fde..0e72e10 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -54,6 +54,12 @@
 (expect "http://example.com"
         (get-base-url (snippet "<head><base href=\"http://example.com\"></head>")))
 
+(expect "/"
+        (normalise-url (snippet "<head></head>") "/"))
+
+(expect "http://example.com/"
+        (normalise-url (snippet "<head><base href=\"http://example.com\"></head>") "/"))
+
 (expect {:name '("Name")}
         (parse-p (snippet "<p class=\"p-name\"><span class=\"value\">Name</span></p>")))