Ensure child microformats are only parsed once
1 file changed, 12 insertions(+), 1 deletion(-)
jump to
M src/microformats/parser.clj → src/microformats/parser.clj
@@ -2,6 +2,7 @@ (ns microformats.parser (:require [net.cgrand.enlive-html :as html] [clojure.zip :as z] [clojure.core.reducers :as r] + [clojure.set :as set] [clojure.string :as str] [clojurewerkz.urly.core :as url]))@@ -323,13 +324,23 @@ "u" (parse-u loc) "dt" (parse-dt loc) "e" (parse-e loc))) +(defn has-child? + [types] (set/subset? #{"p" "h"} types)) + +(defn single-pass-child + "Ensure a child microformat of a property is only parsed as a child" + [types] + (if (has-child? types) + (remove #(= "h" %) types) + types)) + (defn walk "Walk HTML element tree for microformat properties." [loc] (when (and (not (z/end? loc)) (not (contains? #{:br :hr} (-> loc z/node :tag)))) - (map (partial parse-mf loc) class-groups) (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)] + (map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) (defn continue-walking