Merge pull request #51 from Artiavis/zipper-toc

Use Zippers to Build TOC Data Structure and add unit tests
Fixes cryogen-project/cryogen#62
This commit is contained in:
Carmen La 2015-11-01 19:52:07 -05:00
commit 6d50af6a7a
2 changed files with 174 additions and 35 deletions

View file

@ -1,10 +1,11 @@
(ns cryogen-core.toc (ns cryogen-core.toc
(:require [crouton.html :as html] (:require [clojure.zip :as z]
[crouton.html :as html]
[hiccup.core :as hiccup])) [hiccup.core :as hiccup]))
(def _h [:h1 :h2 :h3 :h4 :h5 :h6]) (def _h [:h1 :h2 :h3 :h4 :h5 :h6])
(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1))) (defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1)))
(defn- get-headings (defn- get-headings
"Turn a body of html content into a vector of elements whose tags are "Turn a body of html content into a vector of elements whose tags are
@ -19,33 +20,77 @@
headings))) headings)))
[] content)) [] content))
(defn make-links (defn- zip-toc-tree-to-insertion-point
"Create a table of contents from the given headings. This function will look "Given a toc-tree zipper and a header level, navigate
for either: the zipper to the appropriate parent of the level for that header
(1) headings with a child anchor with a non-nil name attribute, e.g. to be inserted and return the zipper."
<h1><a name=\"reference\">Reference Title</a></h1> [toctree h-tag]
or (if-let [current-tag (-> toctree first :value :tag)]
(2) headings with an id attribute, e.g. <h1 id=\"reference\">Reference Title</h1> (let [direction (compare-index h-tag current-tag)]
In both cases above, the anchor reference becomes \"#reference\" and the (cond (zero? direction) (z/up toctree) ; Tag belongs at current level
anchor text is \"Reference Title\"." (neg? direction) toctree ; Tag belongs below this level
[headings li-tags] (pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up
(let [[li-open li-close] li-tags] ; This level is the root list, return it
(loop [items headings acc nil _last nil] toctree))
(defn- insert-toc-tree-entry
"Inserts a toc-tree (zipper) entry for the given entry at the appropriate place.
Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc."
[tree entry]
(let [{htag :tag} entry
tree (zip-toc-tree-to-insertion-point tree htag)]
(-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost)))
(defn- build-toc-tree
"Given a sequence of header nodes, build a toc tree using zippers
and return it."
[headings]
(loop [zp (z/zipper
map?
:children
(fn [node children] (assoc node :children (apply vector children)))
{:value :root :children []})
items headings]
(if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)] (if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)]
(let [anchor (or id name)] (let [anchor (or id name)]
(if (nil? anchor) (if (nil? anchor)
(recur (rest items) acc nil) (recur zp (rest items))
(let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]] (recur (insert-toc-tree-entry
jump (compare_index _last tag)] zp {:tag tag :anchor anchor :text (or title (first htext))})
(cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag) (rest items))))
(= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag) (z/root zp))))
(< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close))
(hiccup/html entry)) tag)))))
(str acc li-close)))))
(def _list-types {true ["<ol>" "</ol>"] :ol ["<ol>" "</ol>"] :ul ["<ul>" "</ul>"]})
(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}] (defn- make-toc-entry
"Given an anchor link and some text, construct a toc entry
consisting of link to the anchor using the given text, wrapped
in an <li> tag."
[anchor text]
(when (and anchor text)
[:li [:a {:href (str "#" anchor)} text]]))
(defn- build-toc
"Given the root of a toc tree and either :ol or :ul,
generate the table of contents and return it as a hiccup tree."
[toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}]
(let [{:keys [children], {:keys [anchor text]} :value} toc-tree
li (make-toc-entry anchor text)
first-list-open (if outer-list?
(keyword (str (name list-open) ".content"))
list-open)]
; Create hiccup sequence of :ol/:ul tag and sequence of :li tags
(if (seq children)
(let [sublist [first-list-open (map build-toc children
(repeat list-open)
(repeat :outer-list?)
(repeat false))]]
(if-let [li li] ; The root element has nothing so ignore it
(seq [li sublist]) ; Use seq to lazily concat the li with the sublists
sublist))
li))) ; Or just return the naked :li tag
(defn generate-toc
"Reads an HTML string and parses it for headers, then returns a list of links "Reads an HTML string and parses it for headers, then returns a list of links
to them. to them.
@ -53,14 +98,13 @@
:ol and true will result in an ordered list being generated for the table of :ol and true will result in an ordered list being generated for the table of
contents, while :ul will result in an unordered list. The default is an contents, while :ul will result in an unordered list. The default is an
ordered list." ordered list."
(let [li-tags (_list-types list-type) [^String html & {:keys [list-type] :or {list-type :ol}}]
top-tag (-> li-tags first (subs 1 3))]
(-> html (-> html
(.getBytes "UTF-8") (.getBytes "UTF-8")
(java.io.ByteArrayInputStream.) (java.io.ByteArrayInputStream.)
(html/parse) (html/parse)
:content :content
(get-headings) (get-headings)
(make-links li-tags) (build-toc-tree)
(clojure.string/replace-first (build-toc list-type)
(re-pattern top-tag) (str top-tag " class=\"contents\""))))) (hiccup/html)))

View file

@ -0,0 +1,95 @@
(ns cryogen-core.toc-test
(:require [clojure.test :refer :all]
[clojure.string :refer [join]]
[clojure.zip :as z]
[crouton.html :as html]
[hiccup.core :as hiccup]
[cryogen-core.toc :refer :all]))
; Reimport private functions
(def get-headings #'cryogen-core.toc/get-headings)
(def make-toc-entry #'cryogen-core.toc/make-toc-entry)
(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point)
(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry)
(def build-toc-tree #'cryogen-core.toc/build-toc-tree)
(def build-toc #'cryogen-core.toc/build-toc)
(defn parse-to-headings
[hiccup-seq]
(-> hiccup-seq hiccup/html html/parse-string :content get-headings))
; Test that the get-headings function properly filters non-headers
(deftest test-get-headings
(let [noisy-headers [:div [:h1 "First H1"]
[:p "Ignore..."]
[:h2 "First H2"]]]
(is (= (parse-to-headings noisy-headers)
[{:tag :h1 :attrs nil :content ["First H1"]}
{:tag :h2 :attrs nil :content ["First H2"]}]))))
; Test that the make-toc-entry ignores invalid input
(deftest test-make-toc-entry
(is (nil?
(make-toc-entry nil "Text")))
(is (nil?
(make-toc-entry "anchor" nil)))
(is (= [:li [:a {:href "#anchor"} "Text"]]
(make-toc-entry "anchor" "Text"))))
; Test that the built table of contents always treats later
; headers as being at the same level as earlier headers, even
; if the later headers are strictly greater in value.
; E.G.
; * h2
; * h3
; * h1
(deftest test-build-toc
(let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]]
no-headers [:div [:p "This is not a header"]]
closing-header-larger-than-opening-1
[:div [:h2 [:a {:name "starting_low"}]
"Starting Low"]
[:h1 [:a {:name "finishing_high"}]
"Finishing High"]]
closing-header-larger-than-opening-2
[:div [:h2 [:a {:name "starting_low"}]
"Starting Low"]
[:h4 [:a {:name "jumping_in"}]
"Jumping Right In"]
[:h3 [:a {:name "pulling_back"}]
"But then pull back"]
[:h2 [:a {:name "to_the_top"}]
"To the top"]]]
(is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])]
(-> simplest-header parse-to-headings build-toc-tree
(build-toc :ol))))
(is (nil?
(-> no-headers parse-to-headings build-toc-tree
(build-toc :ol))))
(is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]]
[:li [:a {:href "#finishing_high"} "Finishing High"]]])]
(-> closing-header-larger-than-opening-1
parse-to-headings
build-toc-tree
(build-toc :ol)))
"No outer header should be less indented than the first header tag.")
(is (= [:ul.content
(seq [
(seq [
[:li [:a {:href "#starting_low"} "Starting Low"]]
[:ul
(seq [
[:li [:a {:href "#jumping_in"} "Jumping Right In"]]
[:li [:a {:href "#pulling_back"} "But then pull back"]]
])
] ])
[:li [:a {:href "#to_the_top"} "To the top"]]
])]
(-> closing-header-larger-than-opening-2
parse-to-headings
build-toc-tree
(build-toc :ul)))
(join "" ["Inner headers can be more indented, "
"but outer headers cannot be less indented "
"than the original header."]))
))