about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-10-12 00:52:36 +0100
committerAlan Pearce2014-10-12 00:52:36 +0100
commit590b586073457434ade3d6ebbfc6f2ca72cc8806 (patch)
tree2ff215902ed05bb5507dd5884ef0c1cee9b515ab
parent8d3b9dc5f28587d0290ebf6f7c0bedf0866f5b62 (diff)
downloadmicroformats-590b586073457434ade3d6ebbfc6f2ca72cc8806.tar.lz
microformats-590b586073457434ade3d6ebbfc6f2ca72cc8806.tar.zst
microformats-590b586073457434ade3d6ebbfc6f2ca72cc8806.zip
Ensure child microformats are only parsed once
-rw-r--r--src/microformats/parser.clj13
1 files changed, 12 insertions, 1 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 92104dc..759f84c 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -2,6 +2,7 @@
   (:require [net.cgrand.enlive-html :as html]
             [clojure.zip :as z]
             [clojure.core.reducers :as r]
+            [clojure.set :as set]
             [clojure.string :as str]
             [clojurewerkz.urly.core :as url]))
 
@@ -323,13 +324,23 @@
     "dt" (parse-dt loc)
     "e" (parse-e loc)))
 
+(defn has-child?
+  [types] (set/subset? #{"p" "h"} types))
+
+(defn single-pass-child
+  "Ensure a child microformat of a property is only parsed as a child"
+  [types]
+  (if (has-child? types)
+    (remove #(= "h" %) types)
+    types))
+
 (defn walk
   "Walk HTML element tree for microformat properties."
   [loc]
   (when (and (not (z/end? loc))
              (not (contains? #{:br :hr} (-> loc z/node :tag))))
-      (map (partial parse-mf loc) class-groups)
     (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)]
+      (map (partial parse-mf loc) (single-pass-child types))
       (recur (z/next loc)))))
 
 (defn continue-walking