From 590b586073457434ade3d6ebbfc6f2ca72cc8806 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Sun, 12 Oct 2014 00:52:36 +0100 Subject: Ensure child microformats are only parsed once --- src/microformats/parser.clj | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 92104dc..759f84c 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -2,6 +2,7 @@ (:require [net.cgrand.enlive-html :as html] [clojure.zip :as z] [clojure.core.reducers :as r] + [clojure.set :as set] [clojure.string :as str] [clojurewerkz.urly.core :as url])) @@ -323,13 +324,23 @@ "dt" (parse-dt loc) "e" (parse-e loc))) +(defn has-child? + [types] (set/subset? #{"p" "h"} types)) + +(defn single-pass-child + "Ensure a child microformat of a property is only parsed as a child" + [types] + (if (has-child? types) + (remove #(= "h" %) types) + types)) + (defn walk "Walk HTML element tree for microformat properties." [loc] (when (and (not (z/end? loc)) (not (contains? #{:br :hr} (-> loc z/node :tag)))) - (map (partial parse-mf loc) class-groups) (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)] + (map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) (defn continue-walking -- cgit 1.4.1