all repos — archive/microformats @ 8899c5515aa3cad313250c82cacc895f8cd89e2f

Incomplete Clojure microformats library

Fix multi-parsing issue Breaks base-url handling, but I'll have to do that better anyway

Alan Pearce
commit

8899c5515aa3cad313250c82cacc895f8cd89e2f

parent

dc1b88d86c0feffd15489a3f51681a9942d107c0

2 files changed, 25 insertions(+), 8 deletions(-)

jump to
M src/microformats/parser.cljsrc/microformats/parser.clj
@@ -189,7 +189,7 @@ content (:content el)]
(list {:html (apply str (node-to-html content)) :value (apply str (node-to-text content))}))) -(declare continue-walking) +(declare walk-children) (defn gen-property-parser "Create a property parser"
@@ -197,7 +197,7 @@ [f]
(fn [loc] (apply (partial merge-with concat) (f loc) - (continue-walking loc)))) + (walk-children loc)))) (def parse-p "Parse p-* classes within HTML element."
@@ -311,8 +311,8 @@ (defn parse-h
"Parse h-* classes within a HTML element." [loc] (hash-map :type (get-mf-names (z/node loc)) - :properties (apply merge (parse-implied loc) - (continue-walking loc)))) + :properties (merge (parse-implied loc) + (apply merge-with concat (walk-children loc))))) (defn parse-mf "Parse microformats within a HTML element."
@@ -343,11 +343,16 @@ (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)]
(map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) -(defn continue-walking - "Keep walking that tree" +(def map-walk + (comp (r/map (partial apply merge)) + (r/filter identity) + (r/map walk) + (r/map z/xml-zip))) + +(defn walk-children + "Walk through child elements of loc" [loc] - (when (not (z/end? loc)) - (walk (z/next loc)))) + (some->> loc z/children map-walk (into []))) (defn parse-rel "Parse rel attributes of an HTML link element"
M test/microformats/parser_expectations.cljtest/microformats/parser_expectations.clj
@@ -304,3 +304,15 @@ :photo ("http://example.com/me.png")}}) :rels {}}
(parse "<html><head><base href=\"http://example.com/\"></head><body> <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div> </body></html>")) + +(expect {:items '({:type ("h-card"), + :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"), + :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"), + :name ("Mitchell Baker"), + }}) + :rels {}} + (parse "<div class=\"h-card\"> + <img class=\"u-photo\" src=\"http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg\"/> + <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> + <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> + </div>"))