diff options
author | Alan Pearce | 2014-10-19 17:32:42 +0100 |
---|---|---|
committer | Alan Pearce | 2014-10-19 17:32:42 +0100 |
commit | cfc7b8399308136d62f172a4d522652d0097bb96 (patch) | |
tree | c815fbd25bce28751b7b8e6fe3a37c8436c633f8 | |
parent | e741d80aac6cc88ba7101585924d070a35583e5b (diff) | |
download | microformats-cfc7b8399308136d62f172a4d522652d0097bb96.tar.lz microformats-cfc7b8399308136d62f172a4d522652d0097bb96.tar.zst microformats-cfc7b8399308136d62f172a4d522652d0097bb96.zip |
-rw-r--r-- | src/microformats/parser.clj | 21 | ||||
-rw-r--r-- | test/microformats/parser_expectations.clj | 14 |
2 files changed, 25 insertions, 10 deletions
diff --git a/src/microformats/parser.clj b/src/microformats/parser.clj index e0e2310..f5d18d9 100644 --- a/src/microformats/parser.clj +++ b/src/microformats/parser.clj | |||
@@ -114,20 +114,21 @@ | |||
114 | 114 | ||
115 | (defn get-value-class | 115 | (defn get-value-class |
116 | "Get the value class of elements" | 116 | "Get the value class of elements" |
117 | [elements] | 117 | [elements ws?] |
118 | (str/join " " (into [] ((comp (r/map (partial apply str)) | 118 | (str/join (if ws? " " "") |
119 | (r/map node-to-text) | 119 | (into [] ((comp (r/map (partial apply str)) |
120 | (r/map :content)) | 120 | (r/map node-to-text) |
121 | elements)))) | 121 | (r/map :content)) |
122 | elements)))) | ||
122 | 123 | ||
123 | (defn find-value-class | 124 | (defn find-value-class |
124 | "Find and get the value class of elements" | 125 | "Find and get the value class of elements" |
125 | [el] | 126 | [el ws?] |
126 | (anacond | 127 | (anacond |
127 | (not-empty (html/select el [html/root :> :.value-title])) | 128 | (not-empty (html/select el [html/root :> :.value-title])) |
128 | (get-value-title-class %) | 129 | (get-value-title-class %) |
129 | (not-empty (html/select el [html/root :> :.value ])) | 130 | (not-empty (html/select el [html/root :> :.value ])) |
130 | (get-value-class %))) | 131 | (get-value-class % ws?))) |
131 | 132 | ||
132 | (declare parse-h) | 133 | (declare parse-h) |
133 | 134 | ||
@@ -152,7 +153,7 @@ | |||
152 | [loc] | 153 | [loc] |
153 | (let [el (z/node loc)] | 154 | (let [el (z/node loc)] |
154 | (or (find-child-mf loc) | 155 | (or (find-child-mf loc) |
155 | (str/trim (or (find-value-class el) | 156 | (str/trim (or (find-value-class el true) |
156 | (case (:tag el) | 157 | (case (:tag el) |
157 | :img (-> el :attrs :alt) | 158 | :img (-> el :attrs :alt) |
158 | :area (-> el :attrs :alt) | 159 | :area (-> el :attrs :alt) |
@@ -167,7 +168,7 @@ | |||
167 | "Get the u-x property value of an element" | 168 | "Get the u-x property value of an element" |
168 | [loc] | 169 | [loc] |
169 | (let [el (z/node loc)] | 170 | (let [el (z/node loc)] |
170 | (str/trim (or (find-value-class el) | 171 | (str/trim (or (find-value-class el nil) |
171 | (case (:tag el) | 172 | (case (:tag el) |
172 | :a (normalise-url (z/root loc) (-> el :attrs :href)) | 173 | :a (normalise-url (z/root loc) (-> el :attrs :href)) |
173 | :area (normalise-url (z/root loc) (-> el :attrs :href)) | 174 | :area (normalise-url (z/root loc) (-> el :attrs :href)) |
@@ -181,7 +182,7 @@ | |||
181 | "Get the dt-x property value of an element" | 182 | "Get the dt-x property value of an element" |
182 | [loc] | 183 | [loc] |
183 | (let [el (z/node loc)] | 184 | (let [el (z/node loc)] |
184 | (str/trim (or (find-value-class el) | 185 | (str/trim (or (find-value-class el nil) |
185 | (case (:tag el) | 186 | (case (:tag el) |
186 | :time (-> el :attrs :datetime) | 187 | :time (-> el :attrs :datetime) |
187 | :ins (-> el :attrs :datetime) | 188 | :ins (-> el :attrs :datetime) |
diff --git a/test/microformats/parser_expectations.clj b/test/microformats/parser_expectations.clj index cdd26a9..56d86ab 100644 --- a/test/microformats/parser_expectations.clj +++ b/test/microformats/parser_expectations.clj | |||
@@ -322,3 +322,17 @@ | |||
322 | <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> | 322 | <a class=\"p-name u-url\" href=\"http://blog.lizardwrangler.com/\">Mitchell Baker</a> |
323 | <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> | 323 | <a class=\"u-url\" href=\"https://twitter.com/MitchellBaker\">MitchellBaker</a> |
324 | </div>")) | 324 | </div>")) |
325 | |||
326 | (expect {:items '({:type ("h-entry"), | ||
327 | :properties {:name ("microformats.org at 7"), | ||
328 | :url ("http://microformats.org/2012/06/25/microformats-org-at-7")}}) | ||
329 | :rels {}} | ||
330 | |||
331 | (parse "<div class=\"h-entry\"> | ||
332 | <p class=\"p-name\">microformats.org at 7</p> | ||
333 | <p class=\"u-url\"> | ||
334 | <span class=\"value\">http://microformats.org/</span> - | ||
335 | <span class=\"value\">2012/06/25/microformats-org-at-7</span> | ||
336 | </p> | ||
337 | |||
338 | </div>" "http://example.com/")) | ||