From 385ef0db111ad842be4817171f9a19276d312fb5 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Mon, 22 Sep 2014 20:36:26 +0100 Subject: Trim extra spaces from HTML content --- src/microformats/parser.clj | 9 +++++++++ test/microformats/parser_expectations.clj | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index eade3dc..cd57693 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -101,6 +101,15 @@ % (apply str (persistent! (html/emit-tag % (transient []))))) el)) +(defn- node-to-text + "Turn a node into a text string" + [content] + (->> content + html/texts + (map #(str/replace % #"\s+" " ")) + (apply str) + str/trim)) + (defn get-e-value "Get the e-x propery value of an element" [el] diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index 05cc052..93a21b0 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -16,6 +16,14 @@ (expect "Foo bar" (apply str (#'microformats.parser/node-to-html '("Foo " {:tag :strong, :attrs nil, :content ("bar")})))) +(expect "665 3rd St. Suite 207" + (->> "

+665 3rd St. +Suite 207 +

" + snippet :content + (#'microformats.parser/node-to-text))) + (expect {:name '("Name")} (parse-p (snippet "

Name

"))) -- cgit 1.4.1