about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-19 10:06:28 +0100
committerAlan Pearce2014-10-19 10:06:28 +0100
commit8899c5515aa3cad313250c82cacc895f8cd89e2f (patch)
tree40252091923b602a1cc3974c6ed38105feb18181
parentdc1b88d86c0feffd15489a3f51681a9942d107c0 (diff)
downloadmicroformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.lz
microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.tar.zst
microformats-8899c5515aa3cad313250c82cacc895f8cd89e2f.zip
Fix multi-parsing issue
Breaks base-url handling, but I'll have to do that better anyway
-rw-r--r--src/microformats/parser.clj21
-rw-r--r--test/microformats/parser_expectations.clj12
2 files changed, 25 insertions, 8 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 759f84c..7cb3062 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -189,7 +189,7 @@
     (list {:html (apply str (node-to-html content))
            :value (apply str (node-to-text content))})))
 
-(declare continue-walking)
+(declare walk-children)
 
 (defn gen-property-parser
   "Create a property parser"
@@ -197,7 +197,7 @@
   (fn [loc]
     (apply (partial merge-with concat)
            (f loc)
-           (continue-walking loc))))
+           (walk-children loc))))
 
 (def parse-p
   "Parse p-* classes within HTML element."
@@ -311,8 +311,8 @@
   "Parse h-* classes within a HTML element."
   [loc]
   (hash-map :type (get-mf-names (z/node loc))
-            :properties (apply merge (parse-implied loc)
-                               (continue-walking loc))))
+            :properties (merge (parse-implied loc)
+                               (apply merge-with concat (walk-children loc)))))
 
 (defn parse-mf
   "Parse microformats within a HTML element."
@@ -343,11 +343,16 @@
       (map (partial parse-mf loc) (single-pass-child types))
       (recur (z/next loc)))))
 
-(defn continue-walking
-  "Keep walking that tree"
+(def map-walk
+  (comp (r/map (partial apply merge))
+        (r/filter identity)
+        (r/map walk)
+        (r/map z/xml-zip)))
+
+(defn walk-children
+  "Walk through child elements of loc"
   [loc]
-  (when (not (z/end? loc))
-    (walk (z/next loc))))
+  (some->> loc z/children map-walk (into [])))
 
 (defn parse-rel
   "Parse rel attributes of an HTML link element"
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index a60883d..f8c7bfb 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -304,3 +304,15 @@
         (parse "<html><head><base href=\"http://example.com/\"></head><body>
 <div class=\"h-card\"><img class=\"u-photo\" alt=\"Example User\" src=\"me.png\"></div>
 </body></html>"))
+
+(expect {:items '({:type ("h-card"),
+                   :properties {:photo ("http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg"),
+                                :url ("http://blog.lizardwrangler.com/" "https://twitter.com/MitchellBaker"),
+                                :name ("Mitchell Baker"),
+                                }})
+                 :rels {}}
+        (parse "<div class=\"h-card\">
+        <img class=\"u-photo\" src=\"http://blog.mozilla.org/press/files/2012/04/mitchell-baker.jpg\"/>
+        <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a>
+        <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a>
+        </div>"))