diff options
author | Alan Pearce | 2014-10-19 10:06:28 +0100 |
---|---|---|
committer | Alan Pearce | 2014-10-19 10:06:28 +0100 |
commit | 8899c5515aa3cad313250c82cacc895f8cd89e2f (patch) | |
tree | 40252091923b602a1cc3974c6ed38105feb18181 | |
parent | dc1b88d86c0feffd15489a3f51681a9942d107c0 (diff) | |
download | microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.lz microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.zst microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.zip |
Fix multi-parsing issue
Breaks base-url handling, but I'll have to do that better anyway
-rw-r--r-- | src/microformats/parser.clj | 21 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 12 |
2 files changed, 25 insertions, 8 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 759f84c..7cb3062 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj | |||
@@ -189,7 +189,7 @@ | |||
189 | (list {:html (apply str (node-to-html content)) | 189 | (list {:html (apply str (node-to-html content)) |
190 | :value (apply str (node-to-text content))}))) | 190 | :value (apply str (node-to-text content))}))) |
191 | 191 | ||
192 | (declare continue-walking) | 192 | (declare walk-children) |
193 | 193 | ||
194 | (defn gen-property-parser | 194 | (defn gen-property-parser |
195 | "Create a property parser" | 195 | "Create a property parser" |
@@ -197,7 +197,7 @@ | |||
197 | (fn [loc] | 197 | (fn [loc] |
198 | (apply (partial merge-with concat) | 198 | (apply (partial merge-with concat) |
199 | (f loc) | 199 | (f loc) |
200 | (continue-walking loc)))) | 200 | (walk-children loc)))) |
201 | 201 | ||
202 | (def parse-p | 202 | (def parse-p |
203 | "Parse p-* classes within HTML element." | 203 | "Parse p-* classes within HTML element." |
@@ -311,8 +311,8 @@ | |||
311 | "Parse h-* classes within a HTML element." | 311 | "Parse h-* classes within a HTML element." |
312 | [loc] | 312 | [loc] |
313 | (hash-map :type (get-mf-names (z/node loc)) | 313 | (hash-map :type (get-mf-names (z/node loc)) |
314 | :properties (apply merge (parse-implied loc) | 314 | :properties (merge (parse-implied loc) |
315 | (continue-walking loc)))) | 315 | (apply merge-with concat (walk-children loc))))) |
316 | 316 | ||
317 | (defn parse-mf | 317 | (defn parse-mf |
318 | "Parse microformats within a HTML element." | 318 | "Parse microformats within a HTML element." |
@@ -343,11 +343,16 @@ | |||
343 | (map (partial parse-mf loc) (single-pass-child types)) | 343 | (map (partial parse-mf loc) (single-pass-child types)) |
344 | (recur (z/next loc))))) | 344 | (recur (z/next loc))))) |
345 | 345 | ||
346 | (defn continue-walking | 346 | (def map-walk |
347 | "Keep walking that tree" | 347 | (comp (r/map (partial apply merge)) |
348 | (r/filter identity) | ||
349 | (r/map walk) | ||
350 | (r/map z/xml-zip))) | ||
351 | |||
352 | (defn walk-children | ||
353 | "Walk through child elements of loc" | ||
348 | [loc] | 354 | [loc] |
349 | (when (not (z/end? loc)) | 355 | (some->> loc z/children map-walk (into []))) |
350 | (walk (z/next loc)))) | ||
351 | 356 | ||
352 | (defn parse-rel | 357 | (defn parse-rel |
353 | "Parse rel attributes of an HTML link element" | 358 | "Parse rel attributes of an HTML link element" |
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index a60883d..f8c7bfb 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj | |||
@@ -304,3 +304,15 @@ | |||
304 | (parse "<html><head><base href=\"http://example.com/\"></head><body> | 304 | (parse "<html><head><base href=\"http://example.com/\"></head><body> |
305 | <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div> | 305 | <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div> |
306 | </body></html>")) | 306 | </body></html>")) |
307 | |||
308 | (expect {:items '({:type ("h-card"), | ||
309 | :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"), | ||
310 | :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"), | ||
311 | :name ("Mitchell Baker"), | ||
312 | }}) | ||
313 | :rels {}} | ||
314 | (parse "<div class=\"h-card\"> | ||
315 | <img class=\"u-photo\" src=\"http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg\"/> | ||
316 | <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> | ||
317 | <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> | ||
318 | </div>")) | ||