diff options
author | Alan Pearce | 2014-10-19 10:06:28 +0100 |
---|---|---|
committer | Alan Pearce | 2014-10-19 10:06:28 +0100 |
commit | 8899c5515aa3cad313250c82cacc895f8cd89e2f (patch) | |
tree | 40252091923b602a1cc3974c6ed38105feb18181 | |
parent | dc1b88d86c0feffd15489a3f51681a9942d107c0 (diff) | |
download | microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.lz microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.zst microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.zip |
Fix multi-parsing issue
Breaks base-url handling, but I'll have to do that better anyway
-rw-r--r-- | src/microformats/parser.clj | 21 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 12 |
2 files changed, 25 insertions, 8 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 759f84c..7cb3062 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -189,7 +189,7 @@ (list {:html (apply str (node-to-html content)) :value (apply str (node-to-text content))}))) -(declare continue-walking) +(declare walk-children) (defn gen-property-parser "Create a property parser" @@ -197,7 +197,7 @@ (fn [loc] (apply (partial merge-with concat) (f loc) - (continue-walking loc)))) + (walk-children loc)))) (def parse-p "Parse p-* classes within HTML element." @@ -311,8 +311,8 @@ "Parse h-* classes within a HTML element." [loc] (hash-map :type (get-mf-names (z/node loc)) - :properties (apply merge (parse-implied loc) - (continue-walking loc)))) + :properties (merge (parse-implied loc) + (apply merge-with concat (walk-children loc))))) (defn parse-mf "Parse microformats within a HTML element." @@ -343,11 +343,16 @@ (map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) -(defn continue-walking - "Keep walking that tree" +(def map-walk + (comp (r/map (partial apply merge)) + (r/filter identity) + (r/map walk) + (r/map z/xml-zip))) + +(defn walk-children + "Walk through child elements of loc" [loc] - (when (not (z/end? loc)) - (walk (z/next loc)))) + (some->> loc z/children map-walk (into []))) (defn parse-rel "Parse rel attributes of an HTML link element" diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index a60883d..f8c7bfb 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -304,3 +304,15 @@ (parse "<html><head><base href=\"http://example.com/\"></head><body> <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div> </body></html>")) + +(expect {:items '({:type ("h-card"), + :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"), + :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"), + :name ("Mitchell Baker"), + }}) + :rels {}} + (parse "<div class=\"h-card\"> + <img class=\"u-photo\" src=\"http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg\"/> + <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> + <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> + </div>")) |