diff options
author | Alan Pearce | 2014-10-19 17:32:42 +0100 |
---|---|---|
committer | Alan Pearce | 2014-10-19 17:32:42 +0100 |
commit | cfc7b8399308136d62f172a4d522652d0097bb96 (patch) | |
tree | c815fbd25bce28751b7b8e6fe3a37c8436c633f8 | |
parent | e741d80aac6cc88ba7101585924d070a35583e5b (diff) | |
download | microformats-master.tar.lz microformats-master.tar.zst microformats-master.zip |
-rw-r--r-- | src/microformats/parser.clj | 21 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 14 |
2 files changed, 25 insertions, 10 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index e0e2310..f5d18d9 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj @@ -114,20 +114,21 @@ (defn get-value-class "Get the value class of elements" - [elements] - (str/join " " (into [] ((comp (r/map (partial apply str)) - (r/map node-to-text) - (r/map :content)) - elements)))) + [elements ws?] + (str/join (if ws? " " "") + (into [] ((comp (r/map (partial apply str)) + (r/map node-to-text) + (r/map :content)) + elements)))) (defn find-value-class "Find and get the value class of elements" - [el] + [el ws?] (anacond (not-empty (html/select el [html/root :> :.value-title])) (get-value-title-class %) (not-empty (html/select el [html/root :> :.value ])) - (get-value-class %))) + (get-value-class % ws?))) (declare parse-h) @@ -152,7 +153,7 @@ [loc] (let [el (z/node loc)] (or (find-child-mf loc) - (str/trim (or (find-value-class el) + (str/trim (or (find-value-class el true) (case (:tag el) :img (-> el :attrs :alt) :area (-> el :attrs :alt) @@ -167,7 +168,7 @@ "Get the u-x property value of an element" [loc] (let [el (z/node loc)] - (str/trim (or (find-value-class el) + (str/trim (or (find-value-class el nil) (case (:tag el) :a (normalise-url (z/root loc) (-> el :attrs :href)) :area (normalise-url (z/root loc) (-> el :attrs :href)) @@ -181,7 +182,7 @@ "Get the dt-x property value of an element" [loc] (let [el (z/node loc)] - (str/trim (or (find-value-class el) + (str/trim (or (find-value-class el nil) (case (:tag el) :time (-> el :attrs :datetime) :ins (-> el :attrs :datetime) diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index cdd26a9..56d86ab 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj @@ -322,3 +322,17 @@ <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> </div>")) + +(expect {:items '({:type ("h-entry"), + :properties {:name ("microformats.org at 7"), + :url ("http://microformats.org/2012/06/25/microformats-org-at-7")}}) + :rels {}} + + (parse "<div class=\"h-entry\"> + <p class=\"p-name\">microformats.org at 7</p> + <p class=\"u-url\"> + <span class=\"value\">http://microformats.org/</span> - + <span class=\"value\">2012/06/25/microformats-org-at-7</span> + </p> + + </div>" "http://example.com/")) |