Reimplemented TOC logic using zippers to ensure state is always internally valid. Also added unit testing to provide a regression testing suite. This should fix issue #19.
This commit is contained in:
parent
da1fa73f8c
commit
365df0e680
2 changed files with 174 additions and 35 deletions
|
@ -1,10 +1,11 @@
|
|||
(ns cryogen-core.toc
|
||||
(:require [crouton.html :as html]
|
||||
(:require [clojure.zip :as z]
|
||||
[crouton.html :as html]
|
||||
[hiccup.core :as hiccup]))
|
||||
|
||||
(def _h [:h1 :h2 :h3 :h4 :h5 :h6])
|
||||
|
||||
(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
|
||||
(defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
|
||||
|
||||
(defn- get-headings
|
||||
"Turn a body of html content into a vector of elements whose tags are
|
||||
|
@ -19,33 +20,77 @@
|
|||
headings)))
|
||||
[] content))
|
||||
|
||||
(defn make-links
|
||||
"Create a table of contents from the given headings. This function will look
|
||||
for either:
|
||||
(1) headings with a child anchor with a non-nil name attribute, e.g.
|
||||
<h1><a name=\"reference\">Reference Title</a></h1>
|
||||
or
|
||||
(2) headings with an id attribute, e.g. <h1 id=\"reference\">Reference Title</h1>
|
||||
In both cases above, the anchor reference becomes \"#reference\" and the
|
||||
anchor text is \"Reference Title\"."
|
||||
[headings li-tags]
|
||||
(let [[li-open li-close] li-tags]
|
||||
(loop [items headings acc nil _last nil]
|
||||
(defn- zip-toc-tree-to-insertion-point
|
||||
"Given a toc-tree zipper and a header level, navigate
|
||||
the zipper to the appropriate parent of the level for that header
|
||||
to be inserted and return the zipper."
|
||||
[toctree h-tag]
|
||||
(if-let [current-tag (-> toctree first :value :tag)]
|
||||
(let [direction (compare-index h-tag current-tag)]
|
||||
(cond (zero? direction) (z/up toctree) ; Tag belongs at current level
|
||||
(neg? direction) toctree ; Tag belongs below this level
|
||||
(pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up
|
||||
; This level is the root list, return it
|
||||
toctree))
|
||||
|
||||
(defn- insert-toc-tree-entry
|
||||
"Inserts a toc-tree (zipper) entry for the given entry at the appropriate place.
|
||||
Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc."
|
||||
[tree entry]
|
||||
(let [{htag :tag} entry
|
||||
tree (zip-toc-tree-to-insertion-point tree htag)]
|
||||
(-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost)))
|
||||
|
||||
(defn- build-toc-tree
|
||||
"Given a sequence of header nodes, build a toc tree using zippers
|
||||
and return it."
|
||||
[headings]
|
||||
(loop [zp (z/zipper
|
||||
map?
|
||||
:children
|
||||
(fn [node children] (assoc node :children (apply vector children)))
|
||||
{:value :root :children []})
|
||||
items headings]
|
||||
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
|
||||
(let [anchor (or id name)]
|
||||
(if (nil? anchor)
|
||||
(recur (rest items) acc nil)
|
||||
(let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]]
|
||||
jump (compare_index _last tag)]
|
||||
(cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag)
|
||||
(= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag)
|
||||
(< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close))
|
||||
(hiccup/html entry)) tag)))))
|
||||
(str acc li-close)))))
|
||||
(recur zp (rest items))
|
||||
(recur (insert-toc-tree-entry
|
||||
zp {:tag tag :anchor anchor :text (or title (first htext))})
|
||||
(rest items))))
|
||||
(z/root zp))))
|
||||
|
||||
(def _list-types {true ["<ol>" "</ol>"] :ol ["<ol>" "</ol>"] :ul ["<ul>" "</ul>"]})
|
||||
|
||||
(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}]
|
||||
(defn- make-toc-entry
|
||||
"Given an anchor link and some text, construct a toc entry
|
||||
consisting of link to the anchor using the given text, wrapped
|
||||
in an <li> tag."
|
||||
[anchor text]
|
||||
(when (and anchor text)
|
||||
[:li [:a {:href (str "#" anchor)} text]]))
|
||||
|
||||
|
||||
(defn- build-toc
|
||||
"Given the root of a toc tree and either :ol or :ul,
|
||||
generate the table of contents and return it as a hiccup tree."
|
||||
[toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}]
|
||||
(let [{:keys [children], {:keys [anchor text]} :value} toc-tree
|
||||
li (make-toc-entry anchor text)
|
||||
first-list-open (if outer-list?
|
||||
(keyword (str (name list-open) ".content"))
|
||||
list-open)]
|
||||
; Create hiccup sequence of :ol/:ul tag and sequence of :li tags
|
||||
(if (seq children)
|
||||
(let [sublist [first-list-open (map build-toc children
|
||||
(repeat list-open)
|
||||
(repeat :outer-list?)
|
||||
(repeat false))]]
|
||||
(if-let [li li] ; The root element has nothing so ignore it
|
||||
(seq [li sublist]) ; Use seq to lazily concat the li with the sublists
|
||||
sublist))
|
||||
li))) ; Or just return the naked :li tag
|
||||
|
||||
(defn generate-toc
|
||||
"Reads an HTML string and parses it for headers, then returns a list of links
|
||||
to them.
|
||||
|
||||
|
@ -53,14 +98,13 @@
|
|||
:ol and true will result in an ordered list being generated for the table of
|
||||
contents, while :ul will result in an unordered list. The default is an
|
||||
ordered list."
|
||||
(let [li-tags (_list-types list-type)
|
||||
top-tag (-> li-tags first (subs 1 3))]
|
||||
(-> html
|
||||
(.getBytes "UTF-8")
|
||||
(java.io.ByteArrayInputStream.)
|
||||
(html/parse)
|
||||
:content
|
||||
(get-headings)
|
||||
(make-links li-tags)
|
||||
(clojure.string/replace-first
|
||||
(re-pattern top-tag) (str top-tag " class=\"contents\"")))))
|
||||
[^String html & {:keys [list-type] :or {list-type :ol}}]
|
||||
(-> html
|
||||
(.getBytes "UTF-8")
|
||||
(java.io.ByteArrayInputStream.)
|
||||
(html/parse)
|
||||
:content
|
||||
(get-headings)
|
||||
(build-toc-tree)
|
||||
(build-toc list-type)
|
||||
(hiccup/html)))
|
||||
|
|
95
test/cryogen_core/toc_test.clj
Normal file
95
test/cryogen_core/toc_test.clj
Normal file
|
@ -0,0 +1,95 @@
|
|||
(ns cryogen-core.toc-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[clojure.string :refer [join]]
|
||||
[clojure.zip :as z]
|
||||
[crouton.html :as html]
|
||||
[hiccup.core :as hiccup]
|
||||
[cryogen-core.toc :refer :all]))
|
||||
|
||||
; Reimport private functions
|
||||
(def get-headings #'cryogen-core.toc/get-headings)
|
||||
(def make-toc-entry #'cryogen-core.toc/make-toc-entry)
|
||||
(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point)
|
||||
(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry)
|
||||
(def build-toc-tree #'cryogen-core.toc/build-toc-tree)
|
||||
(def build-toc #'cryogen-core.toc/build-toc)
|
||||
|
||||
(defn parse-to-headings
|
||||
[hiccup-seq]
|
||||
(-> hiccup-seq hiccup/html html/parse-string :content get-headings))
|
||||
|
||||
; Test that the get-headings function properly filters non-headers
|
||||
(deftest test-get-headings
|
||||
(let [noisy-headers [:div [:h1 "First H1"]
|
||||
[:p "Ignore..."]
|
||||
[:h2 "First H2"]]]
|
||||
(is (= (parse-to-headings noisy-headers)
|
||||
[{:tag :h1 :attrs nil :content ["First H1"]}
|
||||
{:tag :h2 :attrs nil :content ["First H2"]}]))))
|
||||
|
||||
; Test that the make-toc-entry ignores invalid input
|
||||
(deftest test-make-toc-entry
|
||||
(is (nil?
|
||||
(make-toc-entry nil "Text")))
|
||||
(is (nil?
|
||||
(make-toc-entry "anchor" nil)))
|
||||
(is (= [:li [:a {:href "#anchor"} "Text"]]
|
||||
(make-toc-entry "anchor" "Text"))))
|
||||
|
||||
; Test that the built table of contents always treats later
|
||||
; headers as being at the same level as earlier headers, even
|
||||
; if the later headers are strictly greater in value.
|
||||
; E.G.
|
||||
; * h2
|
||||
; * h3
|
||||
; * h1
|
||||
(deftest test-build-toc
|
||||
(let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]]
|
||||
no-headers [:div [:p "This is not a header"]]
|
||||
closing-header-larger-than-opening-1
|
||||
[:div [:h2 [:a {:name "starting_low"}]
|
||||
"Starting Low"]
|
||||
[:h1 [:a {:name "finishing_high"}]
|
||||
"Finishing High"]]
|
||||
closing-header-larger-than-opening-2
|
||||
[:div [:h2 [:a {:name "starting_low"}]
|
||||
"Starting Low"]
|
||||
[:h4 [:a {:name "jumping_in"}]
|
||||
"Jumping Right In"]
|
||||
[:h3 [:a {:name "pulling_back"}]
|
||||
"But then pull back"]
|
||||
[:h2 [:a {:name "to_the_top"}]
|
||||
"To the top"]]]
|
||||
(is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])]
|
||||
(-> simplest-header parse-to-headings build-toc-tree
|
||||
(build-toc :ol))))
|
||||
(is (nil?
|
||||
(-> no-headers parse-to-headings build-toc-tree
|
||||
(build-toc :ol))))
|
||||
(is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]]
|
||||
[:li [:a {:href "#finishing_high"} "Finishing High"]]])]
|
||||
(-> closing-header-larger-than-opening-1
|
||||
parse-to-headings
|
||||
build-toc-tree
|
||||
(build-toc :ol)))
|
||||
"No outer header should be less indented than the first header tag.")
|
||||
(is (= [:ul.content
|
||||
(seq [
|
||||
(seq [
|
||||
[:li [:a {:href "#starting_low"} "Starting Low"]]
|
||||
[:ul
|
||||
(seq [
|
||||
[:li [:a {:href "#jumping_in"} "Jumping Right In"]]
|
||||
[:li [:a {:href "#pulling_back"} "But then pull back"]]
|
||||
])
|
||||
] ])
|
||||
[:li [:a {:href "#to_the_top"} "To the top"]]
|
||||
])]
|
||||
(-> closing-header-larger-than-opening-2
|
||||
parse-to-headings
|
||||
build-toc-tree
|
||||
(build-toc :ul)))
|
||||
(join "" ["Inner headers can be more indented, "
|
||||
"but outer headers cannot be less indented "
|
||||
"than the original header."]))
|
||||
))
|
Loading…
Reference in a new issue