about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-19 10:06:28 +0100
committerAlan Pearce2014-10-19 10:06:28 +0100
commit8899c5515aa3cad313250c82cacc895f8cd89e2f (patch)
tree40252091923b602a1cc3974c6ed38105feb18181
parentdc1b88d86c0feffd15489a3f51681a9942d107c0 (diff)
downloadmicroformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.lz
microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.zst
microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.zip
Fix multi-parsing issue
Breaks base-url handling, but I'll have to do that better anyway
-rw-r--r--src/microformats/parser.clj21
-rw-r--r--test/microformats/parser_expectations.clj12
2 files changed, 25 insertions, 8 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 759f84c..7cb3062 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj
@@ -189,7 +189,7 @@
189 (list {:html (apply str (node-to-html content)) 189 (list {:html (apply str (node-to-html content))
190 :value (apply str (node-to-text content))}))) 190 :value (apply str (node-to-text content))})))
191 191
192(declare continue-walking) 192(declare walk-children)
193 193
194(defn gen-property-parser 194(defn gen-property-parser
195 "Create a property parser" 195 "Create a property parser"
@@ -197,7 +197,7 @@
197 (fn [loc] 197 (fn [loc]
198 (apply (partial merge-with concat) 198 (apply (partial merge-with concat)
199 (f loc) 199 (f loc)
200 (continue-walking loc)))) 200 (walk-children loc))))
201 201
202(def parse-p 202(def parse-p
203 "Parse p-* classes within HTML element." 203 "Parse p-* classes within HTML element."
@@ -311,8 +311,8 @@
311 "Parse h-* classes within a HTML element." 311 "Parse h-* classes within a HTML element."
312 [loc] 312 [loc]
313 (hash-map :type (get-mf-names (z/node loc)) 313 (hash-map :type (get-mf-names (z/node loc))
314 :properties (apply merge (parse-implied loc) 314 :properties (merge (parse-implied loc)
315 (continue-walking loc)))) 315 (apply merge-with concat (walk-children loc)))))
316 316
317(defn parse-mf 317(defn parse-mf
318 "Parse microformats within a HTML element." 318 "Parse microformats within a HTML element."
@@ -343,11 +343,16 @@
343 (map (partial parse-mf loc) (single-pass-child types)) 343 (map (partial parse-mf loc) (single-pass-child types))
344 (recur (z/next loc))))) 344 (recur (z/next loc)))))
345 345
346(defn continue-walking 346(def map-walk
347 "Keep walking that tree" 347 (comp (r/map (partial apply merge))
348 (r/filter identity)
349 (r/map walk)
350 (r/map z/xml-zip)))
351
352(defn walk-children
353 "Walk through child elements of loc"
348 [loc] 354 [loc]
349 (when (not (z/end? loc)) 355 (some->> loc z/children map-walk (into [])))
350 (walk (z/next loc))))
351 356
352(defn parse-rel 357(defn parse-rel
353 "Parse rel attributes of an HTML link element" 358 "Parse rel attributes of an HTML link element"
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index a60883d..f8c7bfb 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj
@@ -304,3 +304,15 @@
304 (parse "<html><head><base href=\"http://example.com/\"></head><body> 304 (parse "<html><head><base href=\"http://example.com/\"></head><body>
305<div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div> 305<div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div>
306</body></html>")) 306</body></html>"))
307
308(expect {:items '({:type ("h-card"),
309 :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"),
310 :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"),
311 :name ("Mitchell Baker"),
312 }})
313 :rels {}}
314 (parse "<div class=\"h-card\">
315 <img class=\"u-photo\" src=\"http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg\"/>
316 <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a>
317 <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a>
318 </div>"))