about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-10 16:23:32 +0100
committerAlan Pearce2014-10-10 16:23:32 +0100
commit3213c0c46a152e709772f0818a9281f5ce0e1988 (patch)
tree8aeba85dd1a45b6ae06a01497e993bcd52b754ca
parent49ed30908feb17d4ca6ea5dd2536b7cf79e395d1 (diff)
downloadmicroformats-3213c0c46a152e709772f0818a9281f5ce0e1988.tar.lz
microformats-3213c0c46a152e709772f0818a9281f5ce0e1988.tar.zst
microformats-3213c0c46a152e709772f0818a9281f5ce0e1988.zip
Normalise parsed URLs
-rw-r--r--src/microformats/parser.clj47
-rw-r--r--test/microformats/parser_expectations.clj25
2 files changed, 50 insertions, 22 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index e29ca9b..0b26372 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -146,10 +146,10 @@
   (let [el (z/node loc)]
     (str/trim (or (find-value-class el)
                   (case (:tag el)
-                    :a (-> el :attrs :href)
-                    :area (-> el :attrs :href)
-                    :img (-> el :attrs :src)
-                    :object (-> el :attrs :data)
+                    :a (normalise-url (z/root loc) (-> el :attrs :href))
+                    :area (normalise-url (z/root loc) (-> el :attrs :href))
+                    :img (normalise-url (z/root loc) (-> el :attrs :src))
+                    :object (normalise-url (z/root loc) (-> el :attrs :data))
                     (get-p-value loc))
                   (node-to-text (:content el))
                   ""))))
@@ -246,27 +246,30 @@
 (defn- parse-implied-url
   [loc]
   (let [element (z/node loc)]
-    (case (:tag element)
-      :a (-> element :attrs :href)
-      (if-let [% (first (html/select element [html/root :> [:a (html/attr? :href) html/only-of-type (html/but-node (html/attr-contains :class "h-"))]]))]
-        (-> % :attrs :href)))))
+    (some->>
+     (case (:tag element)
+       :a (-> element :attrs :href)
+       (if-let [% (first (html/select element [html/root :> [:a (html/attr? :href) html/only-of-type (html/but-node (html/attr-contains :class "h-"))]]))]
+         (-> % :attrs :href)))
+     (normalise-url (z/root loc)))))
 
 (defn- parse-implied-photo
   [loc]
   (let [element (z/node loc)]
-    (case (:tag element)
-      :img (-> element :attrs :src)
-      :object (-> element :attrs :data)
-      (anacond
-       (first (html/select element [html/root :> [:img (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
-       (-> % :attrs :src)
-       (first (html/select element [html/root :> [:object (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
-       (-> % :attrs :data)
-       (first (html/select element [html/root :> html/only-child :> [:img (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
-       (-> % :attrs :src)
-       (first (html/select element [html/root :> html/only-child :> [:object (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
-       (-> % :attrs :data)
-       ))))
+    (some->>
+     (case (:tag element)
+       :img (-> element :attrs :src)
+       :object (-> element :attrs :data)
+       (anacond
+        (first (html/select element [html/root :> [:img (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
+        (-> % :attrs :src)
+        (first (html/select element [html/root :> [:object (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
+        (-> % :attrs :data)
+        (first (html/select element [html/root :> html/only-child :> [:img (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
+        (-> % :attrs :src)
+        (first (html/select element [html/root :> html/only-child :> [:object (html/but-node (html/attr-contains :class "h-")) html/only-of-type]]))
+        (-> % :attrs :data)))
+     (normalise-url (z/root loc)))))
 
 (def empty-ish
   #(not (str/blank? (first (second %)))))
@@ -322,7 +325,7 @@
        z/node
        element-to-rels
        (map keyword)
-       (map #(hash-map % [(-> loc z/node :attrs :href)]))
+       (map #(hash-map % [(normalise-url (z/root loc) (-> loc z/node :attrs :href))]))
        (into {})))
 
 (defn select-rels
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index 6ce942e..92e4448 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -157,6 +157,11 @@
 (expect {:author '("http://example.com/a")}
         (parse-rels (snippets "<a rel=\"author\" href=\"http://example.com/a\">author a</a>")))
 
+(expect {:author '("http://example.com/a")}
+        (parse-rels (snippets "
+<html><head><base href=\"http://example.com\"><body>
+<a rel=\"author\" href=\"/a\">author a</a>")))
+
 (expect {:author '("http://example.com/a" "http://example.com/b")}
         (parse-rels (snippets "<a rel=\"author\" href=\"http://example.com/a\">author a</a>
 <a rel=\"author\" href=\"http://example.com/b\">author b</a>")))
@@ -276,3 +281,23 @@
 <span class=\"p-name\">John Doe</span>
 <span class=\"p-org h-card h-org\">Example</span>
 </div>"))
+
+(expect {:items '({:type ("h-card") :properties {:name ("Example User") :url ("http://example.com/")}}) :rels {}}
+        (parse "<html><head><base href=\"http://example.com\"></head><body>
+<div class=\"h-card\"><a class=\"u-url\" href=\"/\">Example User</a></div></body></html>"))
+
+(expect {:items '({:type ("h-card") :properties {:name ("Example User") :url ("http://example.com/")}}) :rels {}}
+        (parse "<html><head><base href=\"http://example.com\"></head><body>
+<div class=\"h-card\"><a href=\"/\">Example User</a></div></body></html>"))
+
+(expect {:items '({:type ("h-card") :properties {:name ("Example User")
+                                                 :photo ("http://example.com/me.png")}}) :rels {}}
+        (parse "<html><head><base href=\"http://example.com/\"></head><body>
+<div class=\"h-card\"><img alt=\"Example User\" src=\"me.png\"></div>
+</body></html>"))
+
+(expect {:items '({:type ("h-card") :properties {:name ("Example User")
+                                                 :photo ("http://example.com/me.png")}}) :rels {}}
+        (parse "<html><head><base href=\"http://example.com/\"></head><body>
+<div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div>
+</body></html>"))