about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2014-09-11 17:46:50 +0100
committerAlan Pearce2014-09-11 17:46:50 +0100
commit8bfc60aaf991201d3971a2e7d248f729bf2949ae (patch)
tree393eb073bba2fdb094211b8b018cd3e12612c88e
parent94aa44b7d5a33ca2a0efe0a72a582d24d65c0657 (diff)
downloadmicroformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.tar.lz
microformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.tar.zst
microformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.zip
Use class name for property keywords
-rw-r--r--src/microformats/parser.clj37
-rw-r--r--test/microformats/parser_test.clj9
2 files changed, 42 insertions, 4 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj
index 4071e85..c39e396 100644
--- a/src/microformats/parser.clj
+++ b/src/microformats/parser.clj
@@ -1,12 +1,45 @@
 (ns microformats.parser
-  (:require [net.cgrand.enlive-html :as html]))
+  (:require [net.cgrand.enlive-html :as html]
+            [clojure.core.reducers :as r]
+            [clojure.string :as str]))
 
 ;;; Turn string into stream
 
+(defn mf-names-from-class
+  "Get microformat classnames from a class attribute"
+  [prefix]
+  (r/filter #(.startsWith % prefix)))
+
+(defn remove-mf-prefix
+  "Remove microformats prefixes from a class attribute"
+  [prefix]
+  (r/map #(apply str (drop (count prefix) %))))
+
+(defn- split-classes
+  "Split a whitespace-separated string."
+  [class]
+  (str/split class #"\s+"))
+
+(defn classes-to-props
+  "Convert class list to list of microformat property keywords"
+  [prefix]
+  (comp (r/map keyword)
+        (remove-mf-prefix prefix)
+        (mf-names-from-class prefix)))
+
+(defn element-to-classes
+  "Get list of classes from an element"
+  [el] (-> el
+           :attrs
+           :class
+           split-classes))
+
 (defn parse-p
   "Parse p-* classes within HTML element."
   [element]
-  (hash-map :properties (hash-map :name (first (:content (first (html/select element [(html/attr-starts :class "p-")])))))))
+  (let [el (first (html/select element [(html/attr-starts :class "p-")]))
+        props (into [] ((classes-to-props "p-") (element-to-classes el)))]
+    (hash-map :properties (apply hash-map (first props) (repeat (count props) (first (:content el)))))))
 
 (defn parse-h
   "Parse h-* classes within a HTML document."
diff --git a/test/microformats/parser_test.clj b/test/microformats/parser_test.clj
index 9720ad2..f37906e 100644
--- a/test/microformats/parser_test.clj
+++ b/test/microformats/parser_test.clj
@@ -9,5 +9,10 @@
 
 (deftest parse-p-inner-text
   (testing "Inner text of a p- property should be parsed")
-  (is (= {:items [{:properties {:name "Example User"}}] :rels {}}
-         (parse "<div class=\"h-card\"><p class=\"p-name\">Example User</p></div>"))))
+  (are [ex in] (= ex (parse in))
+       {:items [{:properties {:name "Example User"}}] :rels {}}
+       "<div class=\"h-card\"><p class=\"p-name\">Example User</p></div>"
+
+       {:items [{:properties {:nickname "exuser"}}] :rels {}}
+       "<div class=\"h-card\"><p class=\"p-nickname\">exuser</p></div>"
+       ))