diff options
author | Alan Pearce | 2014-09-27 13:19:28 +0100 |
---|---|---|
committer | Alan Pearce | 2014-09-27 13:19:28 +0100 |
commit | b702d4ca7f3eb140a5436469e3bc05d997b1bb26 (patch) | |
tree | 40b44d1212d75311c8b8eb3dce61bf856dce2bc9 | |
parent | 652b21ab446448e52a0a1a2e8a43ff98968a549f (diff) | |
download | microformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.tar.lz microformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.tar.zst microformats-b702d4ca7f3eb140a5436469e3bc05d997b1bb26.zip |
Only select the first level of h- elements
-rw-r--r-- | src/microformats/parser.clj | 12 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 21 |
2 files changed, 31 insertions, 2 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 36eba82..8bc7c3c 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -1,5 +1,6 @@ (ns microformats.parser (:require [net.cgrand.enlive-html :as html] + [clojure.zip :as z] [clojure.core.reducers :as r] [clojure.string :as str])) @@ -250,10 +251,17 @@ (hash-map :type (get-mf-names element) :properties (get-mf-properties element))) +;; Adapted from http://stackoverflow.com/a/7686324 +(defn- parents + [pred] + (html/zip-pred (fn [loc] + (some pred (take-while identity (iterate z/up (z/up loc))))))) + (defn select-h - "Select h-* elements within a HTML document." + "Select top-level h-* elements within a HTML element." [element] - (html/select element [(html/attr-contains :class "h-")])) + (html/select element [[(html/attr-contains :class "h-") + (html/but (parents (html/attr? :class)))]])) (defn parse-rel "Parse rel attributes of an HTML link element" diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index 1783af5..455a26f 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -24,6 +24,27 @@ snippet :content (#'microformats.parser/node-to-text))) +(expect '({:tag :div :attrs {:class "h-card"} + :content nil}) + (select-h (snippet + "<div class=\"h-card\"></div>"))) + +(expect '({:tag :div :attrs {:class "h-card"} + :content ({:tag :a :attrs {:class "h-org"} + :content nil})}) + (select-h (snippet + "<div class=\"h-card\"><a class=\"h-org\"></a></div>"))) + +(expect '({:tag :div :attrs {:class "h-card"} + :content ("\n" + {:tag :p :attrs nil + :content ({:tag :a :attrs {:class "h-org"} + :content nil})} + "\n")}) + (select-h (snippet "<div class=\"h-card\"> +<p><a class=\"h-org\"></a></p> +</div>"))) + (expect {:name '("Name")} (parse-p (snippet "<p class=\"p-name\"><span class=\"value\">Name</span></p>"))) |