Reimplemented TOC logic using zippers to ensure state is always internally valid. Also added unit testing to provide a regression testing suite. This should fix issue #19.

This commit is contained in:
Jeff Rabinowitz 2015-11-01 01:59:54 -04:00
parent da1fa73f8c
commit 365df0e680
2 changed files with 174 additions and 35 deletions

View file

@ -1,10 +1,11 @@
(ns cryogen-core.toc
(:require [crouton.html :as html]
(:require [clojure.zip :as z]
[crouton.html :as html]
[hiccup.core :as hiccup]))
(def _h [:h1 :h2 :h3 :h4 :h5 :h6])
(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
(defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
(defn- get-headings
"Turn a body of html content into a vector of elements whose tags are
@ -19,33 +20,77 @@
headings)))
[] content))
(defn make-links
"Create a table of contents from the given headings. This function will look
for either:
(1) headings with a child anchor with a non-nil name attribute, e.g.
<h1><a name=\"reference\">Reference Title</a></h1>
or
(2) headings with an id attribute, e.g. <h1 id=\"reference\">Reference Title</h1>
In both cases above, the anchor reference becomes \"#reference\" and the
anchor text is \"Reference Title\"."
[headings li-tags]
(let [[li-open li-close] li-tags]
(loop [items headings acc nil _last nil]
(defn- zip-toc-tree-to-insertion-point
"Given a toc-tree zipper and a header level, navigate
the zipper to the appropriate parent of the level for that header
to be inserted and return the zipper."
[toctree h-tag]
(if-let [current-tag (-> toctree first :value :tag)]
(let [direction (compare-index h-tag current-tag)]
(cond (zero? direction) (z/up toctree) ; Tag belongs at current level
(neg? direction) toctree ; Tag belongs below this level
(pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up
; This level is the root list, return it
toctree))
(defn- insert-toc-tree-entry
"Inserts a toc-tree (zipper) entry for the given entry at the appropriate place.
Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc."
[tree entry]
(let [{htag :tag} entry
tree (zip-toc-tree-to-insertion-point tree htag)]
(-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost)))
(defn- build-toc-tree
"Given a sequence of header nodes, build a toc tree using zippers
and return it."
[headings]
(loop [zp (z/zipper
map?
:children
(fn [node children] (assoc node :children (apply vector children)))
{:value :root :children []})
items headings]
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
(let [anchor (or id name)]
(if (nil? anchor)
(recur (rest items) acc nil)
(let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]]
jump (compare_index _last tag)]
(cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag)
(= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag)
(< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close))
(hiccup/html entry)) tag)))))
(str acc li-close)))))
(recur zp (rest items))
(recur (insert-toc-tree-entry
zp {:tag tag :anchor anchor :text (or title (first htext))})
(rest items))))
(z/root zp))))
(def _list-types {true ["<ol>" "</ol>"] :ol ["<ol>" "</ol>"] :ul ["<ul>" "</ul>"]})
(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}]
(defn- make-toc-entry
"Given an anchor link and some text, construct a toc entry
consisting of link to the anchor using the given text, wrapped
in an <li> tag."
[anchor text]
(when (and anchor text)
[:li [:a {:href (str "#" anchor)} text]]))
(defn- build-toc
"Given the root of a toc tree and either :ol or :ul,
generate the table of contents and return it as a hiccup tree."
[toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}]
(let [{:keys [children], {:keys [anchor text]} :value} toc-tree
li (make-toc-entry anchor text)
first-list-open (if outer-list?
(keyword (str (name list-open) ".content"))
list-open)]
; Create hiccup sequence of :ol/:ul tag and sequence of :li tags
(if (seq children)
(let [sublist [first-list-open (map build-toc children
(repeat list-open)
(repeat :outer-list?)
(repeat false))]]
(if-let [li li] ; The root element has nothing so ignore it
(seq [li sublist]) ; Use seq to lazily concat the li with the sublists
sublist))
li))) ; Or just return the naked :li tag
(defn generate-toc
"Reads an HTML string and parses it for headers, then returns a list of links
to them.
@ -53,14 +98,13 @@
:ol and true will result in an ordered list being generated for the table of
contents, while :ul will result in an unordered list. The default is an
ordered list."
(let [li-tags (_list-types list-type)
top-tag (-> li-tags first (subs 1 3))]
[^String html & {:keys [list-type] :or {list-type :ol}}]
(-> html
(.getBytes "UTF-8")
(java.io.ByteArrayInputStream.)
(html/parse)
:content
(get-headings)
(make-links li-tags)
(clojure.string/replace-first
(re-pattern top-tag) (str top-tag " class=\"contents\"")))))
(build-toc-tree)
(build-toc list-type)
(hiccup/html)))

View file

@ -0,0 +1,95 @@
(ns cryogen-core.toc-test
(:require [clojure.test :refer :all]
[clojure.string :refer [join]]
[clojure.zip :as z]
[crouton.html :as html]
[hiccup.core :as hiccup]
[cryogen-core.toc :refer :all]))
; Reimport private functions
(def get-headings #'cryogen-core.toc/get-headings)
(def make-toc-entry #'cryogen-core.toc/make-toc-entry)
(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point)
(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry)
(def build-toc-tree #'cryogen-core.toc/build-toc-tree)
(def build-toc #'cryogen-core.toc/build-toc)
(defn parse-to-headings
[hiccup-seq]
(-> hiccup-seq hiccup/html html/parse-string :content get-headings))
; Test that the get-headings function properly filters non-headers
(deftest test-get-headings
(let [noisy-headers [:div [:h1 "First H1"]
[:p "Ignore..."]
[:h2 "First H2"]]]
(is (= (parse-to-headings noisy-headers)
[{:tag :h1 :attrs nil :content ["First H1"]}
{:tag :h2 :attrs nil :content ["First H2"]}]))))
; Test that the make-toc-entry ignores invalid input
(deftest test-make-toc-entry
(is (nil?
(make-toc-entry nil "Text")))
(is (nil?
(make-toc-entry "anchor" nil)))
(is (= [:li [:a {:href "#anchor"} "Text"]]
(make-toc-entry "anchor" "Text"))))
; Test that the built table of contents always treats later
; headers as being at the same level as earlier headers, even
; if the later headers are strictly greater in value.
; E.G.
; * h2
; * h3
; * h1
(deftest test-build-toc
(let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]]
no-headers [:div [:p "This is not a header"]]
closing-header-larger-than-opening-1
[:div [:h2 [:a {:name "starting_low"}]
"Starting Low"]
[:h1 [:a {:name "finishing_high"}]
"Finishing High"]]
closing-header-larger-than-opening-2
[:div [:h2 [:a {:name "starting_low"}]
"Starting Low"]
[:h4 [:a {:name "jumping_in"}]
"Jumping Right In"]
[:h3 [:a {:name "pulling_back"}]
"But then pull back"]
[:h2 [:a {:name "to_the_top"}]
"To the top"]]]
(is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])]
(-> simplest-header parse-to-headings build-toc-tree
(build-toc :ol))))
(is (nil?
(-> no-headers parse-to-headings build-toc-tree
(build-toc :ol))))
(is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]]
[:li [:a {:href "#finishing_high"} "Finishing High"]]])]
(-> closing-header-larger-than-opening-1
parse-to-headings
build-toc-tree
(build-toc :ol)))
"No outer header should be less indented than the first header tag.")
(is (= [:ul.content
(seq [
(seq [
[:li [:a {:href "#starting_low"} "Starting Low"]]
[:ul
(seq [
[:li [:a {:href "#jumping_in"} "Jumping Right In"]]
[:li [:a {:href "#pulling_back"} "But then pull back"]]
])
] ])
[:li [:a {:href "#to_the_top"} "To the top"]]
])]
(-> closing-header-larger-than-opening-2
parse-to-headings
build-toc-tree
(build-toc :ul)))
(join "" ["Inner headers can be more indented, "
"but outer headers cannot be less indented "
"than the original header."]))
))