diff --git a/src/cryogen_core/toc.clj b/src/cryogen_core/toc.clj
index 9c20c22..e7bc348 100644
--- a/src/cryogen_core/toc.clj
+++ b/src/cryogen_core/toc.clj
@@ -1,10 +1,11 @@
(ns cryogen-core.toc
- (:require [crouton.html :as html]
+ (:require [clojure.zip :as z]
+ [crouton.html :as html]
[hiccup.core :as hiccup]))
(def _h [:h1 :h2 :h3 :h4 :h5 :h6])
-(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
+(defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
(defn- get-headings
"Turn a body of html content into a vector of elements whose tags are
@@ -19,33 +20,77 @@
headings)))
[] content))
-(defn make-links
- "Create a table of contents from the given headings. This function will look
- for either:
- (1) headings with a child anchor with a non-nil name attribute, e.g.
-
- or
- (2) headings with an id attribute, e.g. Reference Title
- In both cases above, the anchor reference becomes \"#reference\" and the
- anchor text is \"Reference Title\"."
- [headings li-tags]
- (let [[li-open li-close] li-tags]
- (loop [items headings acc nil _last nil]
+(defn- zip-toc-tree-to-insertion-point
+ "Given a toc-tree zipper and a header level, navigate
+ the zipper to the appropriate parent of the level for that header
+ to be inserted and return the zipper."
+ [toctree h-tag]
+ (if-let [current-tag (-> toctree first :value :tag)]
+ (let [direction (compare-index h-tag current-tag)]
+ (cond (zero? direction) (z/up toctree) ; Tag belongs at current level
+ (neg? direction) toctree ; Tag belongs below this level
+ (pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up
+ ; This level is the root list, return it
+ toctree))
+
+(defn- insert-toc-tree-entry
+ "Inserts a toc-tree (zipper) entry for the given entry at the appropriate place.
+ Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc."
+ [tree entry]
+ (let [{htag :tag} entry
+ tree (zip-toc-tree-to-insertion-point tree htag)]
+ (-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost)))
+
+(defn- build-toc-tree
+ "Given a sequence of header nodes, build a toc tree using zippers
+ and return it."
+ [headings]
+ (loop [zp (z/zipper
+ map?
+ :children
+ (fn [node children] (assoc node :children (apply vector children)))
+ {:value :root :children []})
+ items headings]
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
(let [anchor (or id name)]
(if (nil? anchor)
- (recur (rest items) acc nil)
- (let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]]
- jump (compare_index _last tag)]
- (cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag)
- (= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag)
- (< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close))
- (hiccup/html entry)) tag)))))
- (str acc li-close)))))
+ (recur zp (rest items))
+ (recur (insert-toc-tree-entry
+ zp {:tag tag :anchor anchor :text (or title (first htext))})
+ (rest items))))
+ (z/root zp))))
-(def _list-types {true ["" "
"] :ol ["" "
"] :ul [""]})
-(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}]
+(defn- make-toc-entry
+ "Given an anchor link and some text, construct a toc entry
+ consisting of link to the anchor using the given text, wrapped
+ in an tag."
+ [anchor text]
+ (when (and anchor text)
+ [:li [:a {:href (str "#" anchor)} text]]))
+
+
+(defn- build-toc
+ "Given the root of a toc tree and either :ol or :ul,
+ generate the table of contents and return it as a hiccup tree."
+ [toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}]
+ (let [{:keys [children], {:keys [anchor text]} :value} toc-tree
+ li (make-toc-entry anchor text)
+ first-list-open (if outer-list?
+ (keyword (str (name list-open) ".content"))
+ list-open)]
+ ; Create hiccup sequence of :ol/:ul tag and sequence of :li tags
+ (if (seq children)
+ (let [sublist [first-list-open (map build-toc children
+ (repeat list-open)
+ (repeat :outer-list?)
+ (repeat false))]]
+ (if-let [li li] ; The root element has nothing so ignore it
+ (seq [li sublist]) ; Use seq to lazily concat the li with the sublists
+ sublist))
+ li))) ; Or just return the naked :li tag
+
+(defn generate-toc
"Reads an HTML string and parses it for headers, then returns a list of links
to them.
@@ -53,14 +98,13 @@
:ol and true will result in an ordered list being generated for the table of
contents, while :ul will result in an unordered list. The default is an
ordered list."
- (let [li-tags (_list-types list-type)
- top-tag (-> li-tags first (subs 1 3))]
- (-> html
- (.getBytes "UTF-8")
- (java.io.ByteArrayInputStream.)
- (html/parse)
- :content
- (get-headings)
- (make-links li-tags)
- (clojure.string/replace-first
- (re-pattern top-tag) (str top-tag " class=\"contents\"")))))
+ [^String html & {:keys [list-type] :or {list-type :ol}}]
+ (-> html
+ (.getBytes "UTF-8")
+ (java.io.ByteArrayInputStream.)
+ (html/parse)
+ :content
+ (get-headings)
+ (build-toc-tree)
+ (build-toc list-type)
+ (hiccup/html)))
diff --git a/test/cryogen_core/toc_test.clj b/test/cryogen_core/toc_test.clj
new file mode 100644
index 0000000..4554205
--- /dev/null
+++ b/test/cryogen_core/toc_test.clj
@@ -0,0 +1,95 @@
+(ns cryogen-core.toc-test
+ (:require [clojure.test :refer :all]
+ [clojure.string :refer [join]]
+ [clojure.zip :as z]
+ [crouton.html :as html]
+ [hiccup.core :as hiccup]
+ [cryogen-core.toc :refer :all]))
+
+; Reimport private functions
+(def get-headings #'cryogen-core.toc/get-headings)
+(def make-toc-entry #'cryogen-core.toc/make-toc-entry)
+(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point)
+(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry)
+(def build-toc-tree #'cryogen-core.toc/build-toc-tree)
+(def build-toc #'cryogen-core.toc/build-toc)
+
+(defn parse-to-headings
+ [hiccup-seq]
+ (-> hiccup-seq hiccup/html html/parse-string :content get-headings))
+
+; Test that the get-headings function properly filters non-headers
+(deftest test-get-headings
+ (let [noisy-headers [:div [:h1 "First H1"]
+ [:p "Ignore..."]
+ [:h2 "First H2"]]]
+ (is (= (parse-to-headings noisy-headers)
+ [{:tag :h1 :attrs nil :content ["First H1"]}
+ {:tag :h2 :attrs nil :content ["First H2"]}]))))
+
+; Test that the make-toc-entry ignores invalid input
+(deftest test-make-toc-entry
+ (is (nil?
+ (make-toc-entry nil "Text")))
+ (is (nil?
+ (make-toc-entry "anchor" nil)))
+ (is (= [:li [:a {:href "#anchor"} "Text"]]
+ (make-toc-entry "anchor" "Text"))))
+
+; Test that the built table of contents always treats later
+; headers as being at the same level as earlier headers, even
+; if the later headers are strictly greater in value.
+; E.G.
+; * h2
+; * h3
+; * h1
+(deftest test-build-toc
+ (let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]]
+ no-headers [:div [:p "This is not a header"]]
+ closing-header-larger-than-opening-1
+ [:div [:h2 [:a {:name "starting_low"}]
+ "Starting Low"]
+ [:h1 [:a {:name "finishing_high"}]
+ "Finishing High"]]
+ closing-header-larger-than-opening-2
+ [:div [:h2 [:a {:name "starting_low"}]
+ "Starting Low"]
+ [:h4 [:a {:name "jumping_in"}]
+ "Jumping Right In"]
+ [:h3 [:a {:name "pulling_back"}]
+ "But then pull back"]
+ [:h2 [:a {:name "to_the_top"}]
+ "To the top"]]]
+ (is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])]
+ (-> simplest-header parse-to-headings build-toc-tree
+ (build-toc :ol))))
+ (is (nil?
+ (-> no-headers parse-to-headings build-toc-tree
+ (build-toc :ol))))
+ (is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]]
+ [:li [:a {:href "#finishing_high"} "Finishing High"]]])]
+ (-> closing-header-larger-than-opening-1
+ parse-to-headings
+ build-toc-tree
+ (build-toc :ol)))
+ "No outer header should be less indented than the first header tag.")
+ (is (= [:ul.content
+ (seq [
+ (seq [
+ [:li [:a {:href "#starting_low"} "Starting Low"]]
+ [:ul
+ (seq [
+ [:li [:a {:href "#jumping_in"} "Jumping Right In"]]
+ [:li [:a {:href "#pulling_back"} "But then pull back"]]
+ ])
+ ] ])
+ [:li [:a {:href "#to_the_top"} "To the top"]]
+ ])]
+ (-> closing-header-larger-than-opening-2
+ parse-to-headings
+ build-toc-tree
+ (build-toc :ul)))
+ (join "" ["Inner headers can be more indented, "
+ "but outer headers cannot be less indented "
+ "than the original header."]))
+ ))