diff options
author | Alan Pearce | 2014-09-13 09:07:35 +0100 |
---|---|---|
committer | Alan Pearce | 2014-09-13 09:07:35 +0100 |
commit | 08f9c2b929f1a19e39a1cda2743da3837f3cc132 (patch) | |
tree | f13ddb4f0589066de6e3210b8ef015f545d34127 | |
parent | 672e6221dfd065bba69d097806f10ec7344b4fb9 (diff) | |
download | microformats-08f9c2b929f1a19e39a1cda2743da3837f3cc132.tar.lz microformats-08f9c2b929f1a19e39a1cda2743da3837f3cc132.tar.zst microformats-08f9c2b929f1a19e39a1cda2743da3837f3cc132.zip |
Add u-* parsing capability
-rw-r--r-- | src/microformats/parser.clj | 32 | ||||
-rw-r--r-- | test/microformats/parser_test.clj | 21 |
2 files changed, 48 insertions, 5 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index 94e8381..a083ea0 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -34,7 +34,7 @@ :class split-classes)) -(defn get-property +(defn get-p-property "Get the p-x property value of an element" [el] (case (:tag el) @@ -45,17 +45,39 @@ :input (-> el :attrs :value) (or (first (:content el)) ""))) +(defn get-u-property + "Get the u-x property value of an element" + [el] + (case (:tag el) + :a (-> el :attrs :href) + :area (-> el :attrs :href) + :img (-> el :attrs :src) + :object (-> el :attrs :data) + (get-p-property el))) + (defn parse-p "Parse p-* classes within HTML element." [element] - (let [el (first (html/select element [(html/attr-starts :class "p-")])) - props (into [] ((classes-to-props "p-") (element-to-classes el)))] - (hash-map :properties (apply hash-map (first props) (repeat (count props) (get-property el)))))) + (let [prop (get-p-property element)] + (into {} (r/map #(hash-map % prop) ((classes-to-props "p-") (element-to-classes element)))))) + +(defn parse-u + "Parse u-* classes within HTML element" + [element] + (let [prop (get-u-property element)] + (into {} (r/map #(hash-map % prop) ((classes-to-props "u-") (element-to-classes element)))))) + +(defn parse-children + "Parse element children for microformats" + [element] + (let [el (first (html/select element [(html/union [(html/attr-starts :class "p-") + (html/attr-starts :class "u-")])]))] + (hash-map :properties (merge (parse-p el) (parse-u el))))) (defn parse-h "Parse h-* classes within a HTML document." [html] - (mapv parse-p (html/select html [(html/attr-starts :class "h-")]))) + (mapv parse-children (html/select html [(html/attr-starts :class "h-")]))) (defn parse "Parse a HTML string with microformats" diff --git a/test/microformats/parser_test.clj b/test/microformats/parser_test.clj index a28261e..26cb094 100644 --- a/test/microformats/parser_test.clj +++ b/test/microformats/parser_test.clj @@ -36,3 +36,24 @@ {:items [{:properties {:name ""}}] :rels {}} "<div class=\"h-card\"><hr class=\"p-name\"/></div>"))) + +(deftest parse-u-elements + (testing "Tags should have their values parsed as a u-* value" + (are [ex in] (= ex (parse in)) + {:items [{:properties {:url "http://example.com"}}] :rels {}} + "<div class=\"h-card\"><a class=\"u-url\" href=\"http://example.com\">Awesome example website</a></div>" + + {:items [{:properties {:photo "http://example.com/someimage.png"}}] :rels {}} + "<div class=\"h-card\"><img class=\"u-photo\" src=\"http://example.com/someimage.png\"></div>" + + {:items [{:properties {:photo "http://example.com/someimage.png"}}] :rels {}} + "<map class=\"h-card\"><area class=\"u-photo\" href=\"http://example.com/someimage.png\"></area></map>" + + {:items [{:properties {:photo "http://example.com/someimage.png"}}] :rels {}} + "<div class=\"h-card\"><object class=\"u-photo\" data=\"http://example.com/someimage.png\"></object></div>" + + {:items [{:properties {:photo "http://example.com/someimage.png"}}] :rels {}} + "<div class=\"h-card\"><abbr class=\"u-photo\" title=\"http://example.com/someimage.png\"></abbr></div>" + + {:items [{:properties {:photo "http://example.com/someimage.png"}}] :rels {}} + "<div class=\"h-card\"><data class=\"u-photo\" value=\"http://example.com/someimage.png\"></data></div>"))) |