From 8899c5515aa3cad313250c82cacc895f8cd89e2f Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Sun, 19 Oct 2014 10:06:28 +0100 Subject: Fix multi-parsing issue Breaks base-url handling, but I'll have to do that better anyway --- src/microformats/parser.clj | 21 +++++++++++++-------- test/microformats/parser_expectations.clj | 12 ++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 759f84c..7cb3062 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -189,7 +189,7 @@ (list {:html (apply str (node-to-html content)) :value (apply str (node-to-text content))}))) -(declare continue-walking) +(declare walk-children) (defn gen-property-parser "Create a property parser" @@ -197,7 +197,7 @@ (fn [loc] (apply (partial merge-with concat) (f loc) - (continue-walking loc)))) + (walk-children loc)))) (def parse-p "Parse p-* classes within HTML element." @@ -311,8 +311,8 @@ "Parse h-* classes within a HTML element." [loc] (hash-map :type (get-mf-names (z/node loc)) - :properties (apply merge (parse-implied loc) - (continue-walking loc)))) + :properties (merge (parse-implied loc) + (apply merge-with concat (walk-children loc))))) (defn parse-mf "Parse microformats within a HTML element." @@ -343,11 +343,16 @@ (map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) -(defn continue-walking - "Keep walking that tree" +(def map-walk + (comp (r/map (partial apply merge)) + (r/filter identity) + (r/map walk) + (r/map z/xml-zip))) + +(defn walk-children + "Walk through child elements of loc" [loc] - (when (not (z/end? loc)) - (walk (z/next loc)))) + (some->> loc z/children map-walk (into []))) (defn parse-rel "Parse rel attributes of an HTML link element" diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index a60883d..f8c7bfb 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -304,3 +304,15 @@ (parse "