about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-09-13 20:16:51 +0100
committerAlan Pearce2014-09-13 20:20:01 +0100
commit023b48cfad96c261f4abfa64c1092bdb39fba1c1 (patch)
treed21b5d11ff5961bfa317621e556e0cc2c5921ed2
parent26fe7047ce98ffac571e4c8665e25b8900830a90 (diff)
downloadmicroformats-023b48cfad96c261f4abfa64c1092bdb39fba1c1.tar.lz
microformats-023b48cfad96c261f4abfa64c1092bdb39fba1c1.tar.zst
microformats-023b48cfad96c261f4abfa64c1092bdb39fba1c1.zip
Add e-* parsing
-rw-r--r--src/microformats/parser.clj23
-rw-r--r--test/microformats/parser_test.clj12
2 files changed, 34 insertions, 1 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 836fdcb..558f784 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -70,6 +70,20 @@
       (first (:content el))
       ""))
 
+(defn- node-to-html
+  "Turn a node into a list of HTML strings"
+  [el]
+  (map #(if (string? %)
+          %
+          (apply str (persistent! (html/emit-tag % (transient []))))) el))
+
+(defn get-e-value
+  "Get the e-x propery value of an element"
+  [el]
+  (let [content (:content el)]
+    {:html (apply str (node-to-html content))
+     :value (apply str (html/texts content))}))
+
 (defn parse-p
   "Parse p-* classes within HTML element."
   [element]
@@ -88,12 +102,19 @@
   (let [value (get-dt-value element)]
     (into {} (r/map #(hash-map % value) ((classes-to-props "dt-") (element-to-classes element))))))
 
+(defn parse-e
+  "Parse e-* classes within HTML element"
+  [element]
+  (let [value (get-e-value element)]
+    (into {} (r/map #(hash-map % value) ((classes-to-props "e-") (element-to-classes element))))))
+
 (defn parse-children
   "Parse element children for microformats"
   [element]
   (let [el (first (html/select element [(html/union [(html/attr-starts :class "p-")
                                                      (html/attr-starts :class "u-")
-                                                     (html/attr-starts :class "dt-")])]))]
+                                                     (html/attr-starts :class "dt-")
+                                                     (html/attr-starts :class "e-")])]))]
     (hash-map :properties (merge (parse-p el) (parse-u el)))))
 
 (defn parse-h
diff --git a/test/microformats/parser_test.clj b/test/microformats/parser_test.clj
index 504ac24..7d837f2 100644
--- a/test/microformats/parser_test.clj
+++ b/test/microformats/parser_test.clj
@@ -17,6 +17,12 @@
          [:location :name]
          ["someclass" "p-location" "someotherclass" "p-name"])))
 
+(deftest node-to-html-string
+  (testing "`node-to-html' should return a string of HTML content"
+    (are [ex in] (= ex (apply str (#'microformats.parser/node-to-html in)))
+         "Foo <strong>bar</strong>"
+         '("Foo " {:tag :strong, :attrs nil, :content ("bar")}))))
+
 (deftest parse-p-inner-text
   (testing "Inner text of a p- property should be parsed")
   (are [ex in] (= ex (parse-p (first (html-snippet in))))
@@ -109,3 +115,9 @@
 
          {:start "2012-08-05T14:50"}
          "<input class=\"dt-start\" value=\"2012-08-05T14:50\">")))
+
+(deftest parse-e-elements
+  (testing "Tags with e-* classes should have ther content parsed"
+    (are [ex in] (= ex (parse-e (first (html-snippet in))))
+         {:content {:html "Here is a load of <strong>embedded markup</strong>" :value "Here is a load of embedded markup"}}
+         "<div class=\"e-content\">Here is a load of <strong>embedded markup</strong></div>")))