diff options
author | Alan Pearce | 2014-09-22 21:00:33 +0100 |
---|---|---|
committer | Alan Pearce | 2014-09-22 21:00:33 +0100 |
commit | 13937a5afe64e093072b90f4fea2b99f43bf05ed (patch) | |
tree | e63847a699fb17b86c1b63e7ccd43f7b295c69a8 | |
parent | 385ef0db111ad842be4817171f9a19276d312fb5 (diff) | |
download | microformats-13937a5afe64e093072b90f4fea2b99f43bf05ed.tar.lz microformats-13937a5afe64e093072b90f4fea2b99f43bf05ed.tar.zst microformats-13937a5afe64e093072b90f4fea2b99f43bf05ed.zip |
Use node-to-text when getting element content
-rw-r--r-- | src/microformats/parser.clj | 44 |
1 files changed, 22 insertions, 22 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index cd57693..8e325e3 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -39,11 +39,27 @@ :rel split-ws-attribute)) +(defn- node-to-html + "Turn a node into a list of HTML strings" + [el] + (map #(if (string? %) + % + (apply str (persistent! (html/emit-tag % (transient []))))) el)) + +(defn- node-to-text + "Turn a node into a text string" + [content] + (->> content + html/texts + (map #(str/replace % #"\s+" " ")) + (apply str) + str/trim)) + (defn get-value-class "Get the value class of elements" [elements] (str/join " " (into [] ((comp (r/map (partial apply str)) - (r/map html/texts) + (r/map node-to-text) (r/map :content)) elements)))) @@ -64,7 +80,7 @@ :data (-> el :attrs :value) :input (-> el :attrs :value) nil) - (first (:content el)) + (node-to-text (:content el)) ""))) (defn get-u-value @@ -77,7 +93,7 @@ :img (-> el :attrs :src) :object (-> el :attrs :data) (get-p-value el)) - (first :content el) + (node-to-text (:content el)) ""))) (defn get-dt-value @@ -91,31 +107,15 @@ :abbr (-> el :attrs :title) :data (-> el :attrs :value) :input (-> el :attrs :value)) - (first (:content el)) + (node-to-text (:content el)) ""))) -(defn- node-to-html - "Turn a node into a list of HTML strings" - [el] - (map #(if (string? %) - % - (apply str (persistent! (html/emit-tag % (transient []))))) el)) - -(defn- node-to-text - "Turn a node into a text string" - [content] - (->> content - html/texts - (map #(str/replace % #"\s+" " ")) - (apply str) - str/trim)) - (defn get-e-value "Get the e-x propery value of an element" [el] (let [content (:content el)] (list {:html (apply str (node-to-html content)) - :value (apply str (html/texts content))}))) + :value (apply str (node-to-text content))}))) (defn parse-p "Parse p-* classes within HTML element." @@ -164,7 +164,7 @@ (defn- imply-name "Imply the name of an entity from the element" [element] - (:content element)) + (list (node-to-text (:content element)))) (defn parse-implied "Parse implied properties of a HTML element" |