From 062fcf6126c71601fa677bfab13bacdab78ec857 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Fri, 10 Oct 2014 15:53:30 +0100 Subject: Add function to normalise a URL --- src/microformats/parser.clj | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index e3a244e..61c1941 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -2,7 +2,8 @@ (:require [net.cgrand.enlive-html :as html] [clojure.zip :as z] [clojure.core.reducers :as r] - [clojure.string :as str])) + [clojure.string :as str] + [clojurewerkz.urly.core :as url])) (defmacro anacond [& clauses] @@ -68,11 +69,17 @@ (defn get-base-url "Find the base-url of a document." [document] - (-> document - (html/select [:head :> [:base (html/attr? :href)]]) - first - :attrs - :href)) + (or (-> document + (html/select [:head :> [:base (html/attr? :href)]]) + first + :attrs + :href) + "")) + +(defn normalise-url + "Normalise a URL" + [root url] + (url/resolve (get-base-url root) url)) (defn get-value-class "Get the value class of elements" -- cgit 1.4.1