all repos — archive/microformats @ 590b586073457434ade3d6ebbfc6f2ca72cc8806

Incomplete Clojure microformats library

Ensure child microformats are only parsed once

Alan Pearce
commit

590b586073457434ade3d6ebbfc6f2ca72cc8806

parent

8d3b9dc5f28587d0290ebf6f7c0bedf0866f5b62

1 file changed, 12 insertions(+), 1 deletion(-)

jump to
M src/microformats/parser.cljsrc/microformats/parser.clj
@@ -2,6 +2,7 @@ (ns microformats.parser
(:require [net.cgrand.enlive-html :as html] [clojure.zip :as z] [clojure.core.reducers :as r] + [clojure.set :as set] [clojure.string :as str] [clojurewerkz.urly.core :as url]))
@@ -323,13 +324,23 @@ "u" (parse-u loc)
"dt" (parse-dt loc) "e" (parse-e loc))) +(defn has-child? + [types] (set/subset? #{"p" "h"} types)) + +(defn single-pass-child + "Ensure a child microformat of a property is only parsed as a child" + [types] + (if (has-child? types) + (remove #(= "h" %) types) + types)) + (defn walk "Walk HTML element tree for microformat properties." [loc] (when (and (not (z/end? loc)) (not (contains? #{:br :hr} (-> loc z/node :tag)))) - (map (partial parse-mf loc) class-groups) (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)] + (map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) (defn continue-walking