about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-09-22 20:36:26 +0100
committerAlan Pearce2014-09-22 20:36:26 +0100
commit385ef0db111ad842be4817171f9a19276d312fb5 (patch)
tree63b3942303395035c53b8a0021cb9b56a0bed727
parent0fe95fe03334c62c2c36f0e3a1d2be167fc4a506 (diff)
downloadmicroformats-385ef0db111ad842be4817171f9a19276d312fb5.tar.lz
microformats-385ef0db111ad842be4817171f9a19276d312fb5.tar.zst
microformats-385ef0db111ad842be4817171f9a19276d312fb5.zip
Trim extra spaces from HTML content
-rw-r--r--src/microformats/parser.clj9
-rw-r--r--test/microformats/parser_expectations.clj8
2 files changed, 17 insertions, 0 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index eade3dc..cd57693 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -101,6 +101,15 @@
           %
           (apply str (persistent! (html/emit-tag % (transient []))))) el))
 
+(defn- node-to-text
+  "Turn a node into a text string"
+  [content]
+  (->> content
+       html/texts
+       (map #(str/replace % #"\s+" " "))
+       (apply str)
+       str/trim))
+
 (defn get-e-value
   "Get the e-x propery value of an element"
   [el]
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj
index 05cc052..93a21b0 100644
--- a/test/microformats/parser_expectations.clj
+++ b/test/microformats/parser_expectations.clj
@@ -16,6 +16,14 @@
 (expect "Foo <strong>bar</strong>"
         (apply str (#'microformats.parser/node-to-html '("Foo " {:tag :strong, :attrs nil, :content ("bar")}))))
 
+(expect "665 3rd St. Suite 207"
+        (->> "<p class=\"h-adr\">
+<span class=\"p-street-address\">665 3rd St.</span>
+<span class=\"p-extended-address\">Suite 207</span>
+</p>"
+             snippet :content
+             (#'microformats.parser/node-to-text)))
+
 (expect {:name '("Name")}
         (parse-p (snippet "<p class=\"p-name\"><span class=\"value\">Name</span></p>")))