diff options
author | Alan Pearce | 2014-09-11 17:46:50 +0100 |
---|---|---|
committer | Alan Pearce | 2014-09-11 17:46:50 +0100 |
commit | 8bfc60aaf991201d3971a2e7d248f729bf2949ae (patch) | |
tree | 393eb073bba2fdb094211b8b018cd3e12612c88e | |
parent | 94aa44b7d5a33ca2a0efe0a72a582d24d65c0657 (diff) | |
download | microformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.tar.lz microformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.tar.zst microformats-8bfc60aaf991201d3971a2e7d248f729bf2949ae.zip |
Use class name for property keywords
-rw-r--r-- | src/microformats/parser.clj | 37 | ||||
-rw-r--r-- | test/microformats/parser_test.clj | 9 |
2 files changed, 42 insertions, 4 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 4071e85..c39e396 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -1,12 +1,45 @@ (ns microformats.parser - (:require [net.cgrand.enlive-html :as html])) + (:require [net.cgrand.enlive-html :as html] + [clojure.core.reducers :as r] + [clojure.string :as str])) ;;; Turn string into stream +(defn mf-names-from-class + "Get microformat classnames from a class attribute" + [prefix] + (r/filter #(.startsWith % prefix))) + +(defn remove-mf-prefix + "Remove microformats prefixes from a class attribute" + [prefix] + (r/map #(apply str (drop (count prefix) %)))) + +(defn- split-classes + "Split a whitespace-separated string." + [class] + (str/split class #"\s+")) + +(defn classes-to-props + "Convert class list to list of microformat property keywords" + [prefix] + (comp (r/map keyword) + (remove-mf-prefix prefix) + (mf-names-from-class prefix))) + +(defn element-to-classes + "Get list of classes from an element" + [el] (-> el + :attrs + :class + split-classes)) + (defn parse-p "Parse p-* classes within HTML element." [element] - (hash-map :properties (hash-map :name (first (:content (first (html/select element [(html/attr-starts :class "p-")]))))))) + (let [el (first (html/select element [(html/attr-starts :class "p-")])) + props (into [] ((classes-to-props "p-") (element-to-classes el)))] + (hash-map :properties (apply hash-map (first props) (repeat (count props) (first (:content el))))))) (defn parse-h "Parse h-* classes within a HTML document." diff --git a/test/microformats/parser_test.clj b/test/microformats/parser_test.clj index 9720ad2..f37906e 100644 --- a/test/microformats/parser_test.clj +++ b/test/microformats/parser_test.clj @@ -9,5 +9,10 @@ (deftest parse-p-inner-text (testing "Inner text of a p- property should be parsed") - (is (= {:items [{:properties {:name "Example User"}}] :rels {}} - (parse "<div class=\"h-card\"><p class=\"p-name\">Example User</p></div>")))) + (are [ex in] (= ex (parse in)) + {:items [{:properties {:name "Example User"}}] :rels {}} + "<div class=\"h-card\"><p class=\"p-name\">Example User</p></div>" + + {:items [{:properties {:nickname "exuser"}}] :rels {}} + "<div class=\"h-card\"><p class=\"p-nickname\">exuser</p></div>" + )) |