about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-19 13:20:46 +0100
committerAlan Pearce2014-10-19 13:20:46 +0100
commita4735ff01cdb3ac784820b0510bf07a33a561f1b (patch)
tree79e382333d9192939bbd62d14745076ea208a784
parentfe2e6c08a4f8d42317fe8e97a62e061209d4ed4d (diff)
downloadmicroformats-a4735ff01cdb3ac784820b0510bf07a33a561f1b.tar.lz
microformats-a4735ff01cdb3ac784820b0510bf07a33a561f1b.tar.zst
microformats-a4735ff01cdb3ac784820b0510bf07a33a561f1b.zip
Allow base-url to be overridden by parse argument
-rw-r--r--src/microformats/parser.clj22
-rw-r--r--test/microformats/parser_expectations.clj6
2 files changed, 25 insertions, 3 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 21fcc26..9137eaa 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -387,8 +387,24 @@
   [locs]
   (or (apply merge-with into (map parse-rel (select-rels locs))) {}))
 
+(defprotocol node
+  (to-node [html]))
+
+(extend-protocol node
+  String
+  (to-node [html]
+    (some->> html str/trim html/html-snippet)))
+
+(extend-protocol node
+  clojure.lang.LazySeq
+  (to-node [html]
+    html))
+
 (defn parse
   "Parse a HTML string with microformats"
-  [html]
-  (let [document (some->> html str/trim html/html-snippet with-base-url (map z/xml-zip) first)]
-    {:items (some-> document walk) :rels (parse-rels document)}))
+  ([html]
+     (let [document (some->> html to-node with-base-url)]
+       (parse document (get-base-url document))))
+  ([html base-url]
+     (let [document (some->> html to-node first (with-base-url base-url) z/xml-zip)]
+       {:items (some-> document walk) :rels (parse-rels document)})))
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index f8c7bfb..dd7b335 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -305,6 +305,12 @@
 <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div>
 </body></html>"))
 
+(expect-focused {:items '({:type ("h-card") :properties {:name ("Example User")
+                                                 :photo ("http://not-example.com/me.png")}}) :rels {}}
+        (parse "<html><head><base href=\"http://example.com/\"></head><body>
+<div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div>
+</body></html>" "http://not-example.com/"))
+
 (expect {:items '({:type ("h-card"),
                    :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"),
                                 :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"),