From b702d4ca7f3eb140a5436469e3bc05d997b1bb26 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Sat, 27 Sep 2014 13:19:28 +0100 Subject: Only select the first level of h- elements --- src/microformats/parser.clj | 12 ++++++++++-- test/microformats/parser_expectations.clj | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 36eba82..8bc7c3c 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -1,5 +1,6 @@ (ns microformats.parser (:require [net.cgrand.enlive-html :as html] + [clojure.zip :as z] [clojure.core.reducers :as r] [clojure.string :as str])) @@ -250,10 +251,17 @@ (hash-map :type (get-mf-names element) :properties (get-mf-properties element))) +;; Adapted from http://stackoverflow.com/a/7686324 +(defn- parents + [pred] + (html/zip-pred (fn [loc] + (some pred (take-while identity (iterate z/up (z/up loc))))))) + (defn select-h - "Select h-* elements within a HTML document." + "Select top-level h-* elements within a HTML element." [element] - (html/select element [(html/attr-contains :class "h-")])) + (html/select element [[(html/attr-contains :class "h-") + (html/but (parents (html/attr? :class)))]])) (defn parse-rel "Parse rel attributes of an HTML link element" diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index 1783af5..455a26f 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -24,6 +24,27 @@ snippet :content (#'microformats.parser/node-to-text))) +(expect '({:tag :div :attrs {:class "h-card"} + :content nil}) + (select-h (snippet + "
"))) + +(expect '({:tag :div :attrs {:class "h-card"} + :content ({:tag :a :attrs {:class "h-org"} + :content nil})}) + (select-h (snippet + ""))) + +(expect '({:tag :div :attrs {:class "h-card"} + :content ("\n" + {:tag :p :attrs nil + :content ({:tag :a :attrs {:class "h-org"} + :content nil})} + "\n")}) + (select-h (snippet ""))) + (expect {:name '("Name")} (parse-p (snippet "Name
"))) -- cgit 1.4.1