Merge pull request #51 from Artiavis/zipper-toc
Use Zippers to Build TOC Data Structure and add unit tests Fixes cryogen-project/cryogen#62
This commit is contained in:
commit
6d50af6a7a
2 changed files with 174 additions and 35 deletions
|
@ -1,10 +1,11 @@
|
||||||
(ns cryogen-core.toc
|
(ns cryogen-core.toc
|
||||||
(:require [crouton.html :as html]
|
(:require [clojure.zip :as z]
|
||||||
|
[crouton.html :as html]
|
||||||
[hiccup.core :as hiccup]))
|
[hiccup.core :as hiccup]))
|
||||||
|
|
||||||
(def _h [:h1 :h2 :h3 :h4 :h5 :h6])
|
(def _h [:h1 :h2 :h3 :h4 :h5 :h6])
|
||||||
|
|
||||||
(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
|
(defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
|
||||||
|
|
||||||
(defn- get-headings
|
(defn- get-headings
|
||||||
"Turn a body of html content into a vector of elements whose tags are
|
"Turn a body of html content into a vector of elements whose tags are
|
||||||
|
@ -19,33 +20,77 @@
|
||||||
headings)))
|
headings)))
|
||||||
[] content))
|
[] content))
|
||||||
|
|
||||||
(defn make-links
|
(defn- zip-toc-tree-to-insertion-point
|
||||||
"Create a table of contents from the given headings. This function will look
|
"Given a toc-tree zipper and a header level, navigate
|
||||||
for either:
|
the zipper to the appropriate parent of the level for that header
|
||||||
(1) headings with a child anchor with a non-nil name attribute, e.g.
|
to be inserted and return the zipper."
|
||||||
<h1><a name=\"reference\">Reference Title</a></h1>
|
[toctree h-tag]
|
||||||
or
|
(if-let [current-tag (-> toctree first :value :tag)]
|
||||||
(2) headings with an id attribute, e.g. <h1 id=\"reference\">Reference Title</h1>
|
(let [direction (compare-index h-tag current-tag)]
|
||||||
In both cases above, the anchor reference becomes \"#reference\" and the
|
(cond (zero? direction) (z/up toctree) ; Tag belongs at current level
|
||||||
anchor text is \"Reference Title\"."
|
(neg? direction) toctree ; Tag belongs below this level
|
||||||
[headings li-tags]
|
(pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up
|
||||||
(let [[li-open li-close] li-tags]
|
; This level is the root list, return it
|
||||||
(loop [items headings acc nil _last nil]
|
toctree))
|
||||||
|
|
||||||
|
(defn- insert-toc-tree-entry
|
||||||
|
"Inserts a toc-tree (zipper) entry for the given entry at the appropriate place.
|
||||||
|
Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc."
|
||||||
|
[tree entry]
|
||||||
|
(let [{htag :tag} entry
|
||||||
|
tree (zip-toc-tree-to-insertion-point tree htag)]
|
||||||
|
(-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost)))
|
||||||
|
|
||||||
|
(defn- build-toc-tree
|
||||||
|
"Given a sequence of header nodes, build a toc tree using zippers
|
||||||
|
and return it."
|
||||||
|
[headings]
|
||||||
|
(loop [zp (z/zipper
|
||||||
|
map?
|
||||||
|
:children
|
||||||
|
(fn [node children] (assoc node :children (apply vector children)))
|
||||||
|
{:value :root :children []})
|
||||||
|
items headings]
|
||||||
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
|
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
|
||||||
(let [anchor (or id name)]
|
(let [anchor (or id name)]
|
||||||
(if (nil? anchor)
|
(if (nil? anchor)
|
||||||
(recur (rest items) acc nil)
|
(recur zp (rest items))
|
||||||
(let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]]
|
(recur (insert-toc-tree-entry
|
||||||
jump (compare_index _last tag)]
|
zp {:tag tag :anchor anchor :text (or title (first htext))})
|
||||||
(cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag)
|
(rest items))))
|
||||||
(= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag)
|
(z/root zp))))
|
||||||
(< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close))
|
|
||||||
(hiccup/html entry)) tag)))))
|
|
||||||
(str acc li-close)))))
|
|
||||||
|
|
||||||
(def _list-types {true ["<ol>" "</ol>"] :ol ["<ol>" "</ol>"] :ul ["<ul>" "</ul>"]})
|
|
||||||
|
|
||||||
(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}]
|
(defn- make-toc-entry
|
||||||
|
"Given an anchor link and some text, construct a toc entry
|
||||||
|
consisting of link to the anchor using the given text, wrapped
|
||||||
|
in an <li> tag."
|
||||||
|
[anchor text]
|
||||||
|
(when (and anchor text)
|
||||||
|
[:li [:a {:href (str "#" anchor)} text]]))
|
||||||
|
|
||||||
|
|
||||||
|
(defn- build-toc
|
||||||
|
"Given the root of a toc tree and either :ol or :ul,
|
||||||
|
generate the table of contents and return it as a hiccup tree."
|
||||||
|
[toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}]
|
||||||
|
(let [{:keys [children], {:keys [anchor text]} :value} toc-tree
|
||||||
|
li (make-toc-entry anchor text)
|
||||||
|
first-list-open (if outer-list?
|
||||||
|
(keyword (str (name list-open) ".content"))
|
||||||
|
list-open)]
|
||||||
|
; Create hiccup sequence of :ol/:ul tag and sequence of :li tags
|
||||||
|
(if (seq children)
|
||||||
|
(let [sublist [first-list-open (map build-toc children
|
||||||
|
(repeat list-open)
|
||||||
|
(repeat :outer-list?)
|
||||||
|
(repeat false))]]
|
||||||
|
(if-let [li li] ; The root element has nothing so ignore it
|
||||||
|
(seq [li sublist]) ; Use seq to lazily concat the li with the sublists
|
||||||
|
sublist))
|
||||||
|
li))) ; Or just return the naked :li tag
|
||||||
|
|
||||||
|
(defn generate-toc
|
||||||
"Reads an HTML string and parses it for headers, then returns a list of links
|
"Reads an HTML string and parses it for headers, then returns a list of links
|
||||||
to them.
|
to them.
|
||||||
|
|
||||||
|
@ -53,14 +98,13 @@
|
||||||
:ol and true will result in an ordered list being generated for the table of
|
:ol and true will result in an ordered list being generated for the table of
|
||||||
contents, while :ul will result in an unordered list. The default is an
|
contents, while :ul will result in an unordered list. The default is an
|
||||||
ordered list."
|
ordered list."
|
||||||
(let [li-tags (_list-types list-type)
|
[^String html & {:keys [list-type] :or {list-type :ol}}]
|
||||||
top-tag (-> li-tags first (subs 1 3))]
|
|
||||||
(-> html
|
(-> html
|
||||||
(.getBytes "UTF-8")
|
(.getBytes "UTF-8")
|
||||||
(java.io.ByteArrayInputStream.)
|
(java.io.ByteArrayInputStream.)
|
||||||
(html/parse)
|
(html/parse)
|
||||||
:content
|
:content
|
||||||
(get-headings)
|
(get-headings)
|
||||||
(make-links li-tags)
|
(build-toc-tree)
|
||||||
(clojure.string/replace-first
|
(build-toc list-type)
|
||||||
(re-pattern top-tag) (str top-tag " class=\"contents\"")))))
|
(hiccup/html)))
|
||||||
|
|
95
test/cryogen_core/toc_test.clj
Normal file
95
test/cryogen_core/toc_test.clj
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
(ns cryogen-core.toc-test
|
||||||
|
(:require [clojure.test :refer :all]
|
||||||
|
[clojure.string :refer [join]]
|
||||||
|
[clojure.zip :as z]
|
||||||
|
[crouton.html :as html]
|
||||||
|
[hiccup.core :as hiccup]
|
||||||
|
[cryogen-core.toc :refer :all]))
|
||||||
|
|
||||||
|
; Reimport private functions
|
||||||
|
(def get-headings #'cryogen-core.toc/get-headings)
|
||||||
|
(def make-toc-entry #'cryogen-core.toc/make-toc-entry)
|
||||||
|
(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point)
|
||||||
|
(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry)
|
||||||
|
(def build-toc-tree #'cryogen-core.toc/build-toc-tree)
|
||||||
|
(def build-toc #'cryogen-core.toc/build-toc)
|
||||||
|
|
||||||
|
(defn parse-to-headings
|
||||||
|
[hiccup-seq]
|
||||||
|
(-> hiccup-seq hiccup/html html/parse-string :content get-headings))
|
||||||
|
|
||||||
|
; Test that the get-headings function properly filters non-headers
|
||||||
|
(deftest test-get-headings
|
||||||
|
(let [noisy-headers [:div [:h1 "First H1"]
|
||||||
|
[:p "Ignore..."]
|
||||||
|
[:h2 "First H2"]]]
|
||||||
|
(is (= (parse-to-headings noisy-headers)
|
||||||
|
[{:tag :h1 :attrs nil :content ["First H1"]}
|
||||||
|
{:tag :h2 :attrs nil :content ["First H2"]}]))))
|
||||||
|
|
||||||
|
; Test that the make-toc-entry ignores invalid input
|
||||||
|
(deftest test-make-toc-entry
|
||||||
|
(is (nil?
|
||||||
|
(make-toc-entry nil "Text")))
|
||||||
|
(is (nil?
|
||||||
|
(make-toc-entry "anchor" nil)))
|
||||||
|
(is (= [:li [:a {:href "#anchor"} "Text"]]
|
||||||
|
(make-toc-entry "anchor" "Text"))))
|
||||||
|
|
||||||
|
; Test that the built table of contents always treats later
|
||||||
|
; headers as being at the same level as earlier headers, even
|
||||||
|
; if the later headers are strictly greater in value.
|
||||||
|
; E.G.
|
||||||
|
; * h2
|
||||||
|
; * h3
|
||||||
|
; * h1
|
||||||
|
(deftest test-build-toc
|
||||||
|
(let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]]
|
||||||
|
no-headers [:div [:p "This is not a header"]]
|
||||||
|
closing-header-larger-than-opening-1
|
||||||
|
[:div [:h2 [:a {:name "starting_low"}]
|
||||||
|
"Starting Low"]
|
||||||
|
[:h1 [:a {:name "finishing_high"}]
|
||||||
|
"Finishing High"]]
|
||||||
|
closing-header-larger-than-opening-2
|
||||||
|
[:div [:h2 [:a {:name "starting_low"}]
|
||||||
|
"Starting Low"]
|
||||||
|
[:h4 [:a {:name "jumping_in"}]
|
||||||
|
"Jumping Right In"]
|
||||||
|
[:h3 [:a {:name "pulling_back"}]
|
||||||
|
"But then pull back"]
|
||||||
|
[:h2 [:a {:name "to_the_top"}]
|
||||||
|
"To the top"]]]
|
||||||
|
(is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])]
|
||||||
|
(-> simplest-header parse-to-headings build-toc-tree
|
||||||
|
(build-toc :ol))))
|
||||||
|
(is (nil?
|
||||||
|
(-> no-headers parse-to-headings build-toc-tree
|
||||||
|
(build-toc :ol))))
|
||||||
|
(is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]]
|
||||||
|
[:li [:a {:href "#finishing_high"} "Finishing High"]]])]
|
||||||
|
(-> closing-header-larger-than-opening-1
|
||||||
|
parse-to-headings
|
||||||
|
build-toc-tree
|
||||||
|
(build-toc :ol)))
|
||||||
|
"No outer header should be less indented than the first header tag.")
|
||||||
|
(is (= [:ul.content
|
||||||
|
(seq [
|
||||||
|
(seq [
|
||||||
|
[:li [:a {:href "#starting_low"} "Starting Low"]]
|
||||||
|
[:ul
|
||||||
|
(seq [
|
||||||
|
[:li [:a {:href "#jumping_in"} "Jumping Right In"]]
|
||||||
|
[:li [:a {:href "#pulling_back"} "But then pull back"]]
|
||||||
|
])
|
||||||
|
] ])
|
||||||
|
[:li [:a {:href "#to_the_top"} "To the top"]]
|
||||||
|
])]
|
||||||
|
(-> closing-header-larger-than-opening-2
|
||||||
|
parse-to-headings
|
||||||
|
build-toc-tree
|
||||||
|
(build-toc :ul)))
|
||||||
|
(join "" ["Inner headers can be more indented, "
|
||||||
|
"but outer headers cannot be less indented "
|
||||||
|
"than the original header."]))
|
||||||
|
))
|
Loading…
Reference in a new issue