about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-09-27 13:19:28 +0100
committerAlan Pearce2014-09-27 13:19:28 +0100
commitb702d4ca7f3eb140a5436469e3bc05d997b1bb26 (patch)
tree40b44d1212d75311c8b8eb3dce61bf856dce2bc9
parent652b21ab446448e52a0a1a2e8a43ff98968a549f (diff)
downloadmicroformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.tar.lz
microformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.tar.zst
microformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.zip
Only select the first level of h- elements
-rw-r--r--src/microformats/parser.clj12
-rw-r--r--test/microformats/parser_expectations.clj21
2 files changed, 31 insertions, 2 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 36eba82..8bc7c3c 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -1,5 +1,6 @@
 (ns microformats.parser
   (:require [net.cgrand.enlive-html :as html]
+            [clojure.zip :as z]
             [clojure.core.reducers :as r]
             [clojure.string :as str]))
 
@@ -250,10 +251,17 @@
   (hash-map :type (get-mf-names element)
             :properties (get-mf-properties element)))
 
+;; Adapted from http://stackoverflow.com/a/7686324
+(defn- parents
+  [pred]
+  (html/zip-pred (fn [loc]
+                   (some pred (take-while identity (iterate z/up (z/up loc)))))))
+
 (defn select-h
-  "Select h-* elements within a HTML document."
+  "Select top-level h-* elements within a HTML element."
   [element]
-  (html/select element [(html/attr-contains :class "h-")]))
+  (html/select element [[(html/attr-contains :class "h-")
+                         (html/but (parents (html/attr? :class)))]]))
 
 (defn parse-rel
   "Parse rel attributes of an HTML link element"
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index 1783af5..455a26f 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -24,6 +24,27 @@
              snippet :content
              (#'microformats.parser/node-to-text)))
 
+(expect '({:tag :div :attrs {:class "h-card"}
+                   :content nil})
+                (select-h (snippet
+                           "<div class=\"h-card\"></div>")))
+
+(expect '({:tag :div :attrs {:class "h-card"}
+                   :content ({:tag :a :attrs {:class "h-org"}
+                              :content nil})})
+                (select-h (snippet
+                           "<div class=\"h-card\"><a class=\"h-org\"></a></div>")))
+
+(expect '({:tag :div :attrs {:class "h-card"}
+                   :content ("\n"
+                             {:tag :p :attrs nil
+                              :content ({:tag :a :attrs {:class "h-org"}
+                                         :content nil})}
+                             "\n")})
+        (select-h (snippet "<div class=\"h-card\">
+<p><a class=\"h-org\"></a></p>
+</div>")))
+
 (expect {:name '("Name")}
         (parse-p (snippet "<p class=\"p-name\"><span class=\"value\">Name</span></p>")))