all repos — archive/microformats @ fe2e6c08a4f8d42317fe8e97a62e061209d4ed4d

Incomplete Clojure microformats library

Re-implement base url parsing Attach metadata to root nodes and pass it down when walking child nodes.

Alan Pearce
commit

fe2e6c08a4f8d42317fe8e97a62e061209d4ed4d

parent

8899c5515aa3cad313250c82cacc895f8cd89e2f

1 file changed, 20 insertions(+), 6 deletions(-)

jump to
M src/microformats/parser.cljsrc/microformats/parser.clj
@@ -81,12 +81,24 @@
(defn get-base-url "Find the base-url of a document." [document] - (or (-> document + (or (some-> document + meta + :base) + (-> document (html/select [:head :> [:base (html/attr? :href)]]) first :attrs :href) "")) + +(defn with-base-url + "Attach the base URL of a document as metadata" + ([document] + (with-base-url (get-base-url document) document)) + ([base-url document] + (if (instance? clojure.lang.IObj document) + (vary-meta document assoc :base base-url) + document))) (defn normalise-url "Normalise a URL"
@@ -132,7 +144,7 @@ (defn- find-child-mf
"Find child property microformats of an element." [loc] (let [element (z/node loc)] - (when (-> element :attrs :class (.indexOf "h-") (>= 0)) + (when (-> element :attrs :class (.indexOf "h-") (>= 0)) (get-child-mf-properties loc)))) (defn get-p-value
@@ -343,16 +355,18 @@ (if-let [types (some->> loc z/node :attrs :class (re-seq #"(?:^|\s)(h|p|u|dt|e)-\w+") (map second) set)]
(map (partial parse-mf loc) (single-pass-child types)) (recur (z/next loc))))) -(def map-walk +(defn map-walk + [root] (comp (r/map (partial apply merge)) (r/filter identity) (r/map walk) - (r/map z/xml-zip))) + (r/map z/xml-zip) + (r/map (partial with-base-url (get-base-url root))))) (defn walk-children "Walk through child elements of loc" [loc] - (some->> loc z/children map-walk (into []))) + (some->> loc z/children ((map-walk (z/root loc))) (into []))) (defn parse-rel "Parse rel attributes of an HTML link element"
@@ -376,5 +390,5 @@
(defn parse "Parse a HTML string with microformats" [html] - (let [document (first (map z/xml-zip (html/html-snippet (str/trim html))))] + (let [document (some->> html str/trim html/html-snippet with-base-url (map z/xml-zip) first)] {:items (some-> document walk) :rels (parse-rels document)}))