From 365df0e6806a432c82f9dd37d0f1eab8f4f7e16f Mon Sep 17 00:00:00 2001 From: Jeff Rabinowitz Date: Sun, 1 Nov 2015 01:59:54 -0400 Subject: [PATCH] Reimplemented TOC logic using zippers to ensure state is always internally valid. Also added unit testing to provide a regression testing suite. This should fix issue #19. --- src/cryogen_core/toc.clj | 114 +++++++++++++++++++++++---------- test/cryogen_core/toc_test.clj | 95 +++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 35 deletions(-) create mode 100644 test/cryogen_core/toc_test.clj diff --git a/src/cryogen_core/toc.clj b/src/cryogen_core/toc.clj index 9c20c22..e7bc348 100644 --- a/src/cryogen_core/toc.clj +++ b/src/cryogen_core/toc.clj @@ -1,10 +1,11 @@ (ns cryogen-core.toc - (:require [crouton.html :as html] + (:require [clojure.zip :as z] + [crouton.html :as html] [hiccup.core :as hiccup])) (def _h [:h1 :h2 :h3 :h4 :h5 :h6]) -(defn- compare_index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1))) +(defn- compare-index [i1 i2] (- (.indexOf ^clojure.lang.APersistentVector _h i2) (.indexOf ^clojure.lang.APersistentVector _h i1))) (defn- get-headings "Turn a body of html content into a vector of elements whose tags are @@ -19,33 +20,77 @@ headings))) [] content)) -(defn make-links - "Create a table of contents from the given headings. This function will look - for either: - (1) headings with a child anchor with a non-nil name attribute, e.g. -

Reference Title

- or - (2) headings with an id attribute, e.g.

Reference Title

- In both cases above, the anchor reference becomes \"#reference\" and the - anchor text is \"Reference Title\"." - [headings li-tags] - (let [[li-open li-close] li-tags] - (loop [items headings acc nil _last nil] +(defn- zip-toc-tree-to-insertion-point + "Given a toc-tree zipper and a header level, navigate + the zipper to the appropriate parent of the level for that header + to be inserted and return the zipper." + [toctree h-tag] + (if-let [current-tag (-> toctree first :value :tag)] + (let [direction (compare-index h-tag current-tag)] + (cond (zero? direction) (z/up toctree) ; Tag belongs at current level + (neg? direction) toctree ; Tag belongs below this level + (pos? direction) (recur (z/up toctree) h-tag))) ; Keep looking up + ; This level is the root list, return it + toctree)) + +(defn- insert-toc-tree-entry + "Inserts a toc-tree (zipper) entry for the given entry at the appropriate place. + Obeys the invariant that the toc-tree (zipper) is always moved to the inserted loc." + [tree entry] + (let [{htag :tag} entry + tree (zip-toc-tree-to-insertion-point tree htag)] + (-> tree (z/append-child {:children [] :value entry}) z/down z/rightmost))) + +(defn- build-toc-tree + "Given a sequence of header nodes, build a toc tree using zippers + and return it." + [headings] + (loop [zp (z/zipper + map? + :children + (fn [node children] (assoc node :children (apply vector children))) + {:value :root :children []}) + items headings] (if-let [{tag :tag {id :id} :attrs [{{name :name} :attrs} title :as htext] :content} (first items)] (let [anchor (or id name)] (if (nil? anchor) - (recur (rest items) acc nil) - (let [entry [:li [:a {:href (str "#" anchor)} (or title (first htext))]] - jump (compare_index _last tag)] - (cond (> jump 0) (recur (rest items) (str acc li-open (hiccup/html entry)) tag) - (= jump 0) (recur (rest items) (str acc (hiccup/html entry)) tag) - (< jump 0) (recur (rest items) (str acc (apply str (repeat (* -1 jump) li-close)) - (hiccup/html entry)) tag))))) - (str acc li-close))))) + (recur zp (rest items)) + (recur (insert-toc-tree-entry + zp {:tag tag :anchor anchor :text (or title (first htext))}) + (rest items)))) + (z/root zp)))) -(def _list-types {true ["
    " "
"] :ol ["
    " "
"] :ul [""]}) -(defn generate-toc [^String html & {list-type :list-type :or {list-type true}}] +(defn- make-toc-entry + "Given an anchor link and some text, construct a toc entry + consisting of link to the anchor using the given text, wrapped + in an
  • tag." + [anchor text] + (when (and anchor text) + [:li [:a {:href (str "#" anchor)} text]])) + + +(defn- build-toc + "Given the root of a toc tree and either :ol or :ul, + generate the table of contents and return it as a hiccup tree." + [toc-tree list-open & {:keys [outer-list?] :or {outer-list? true}}] + (let [{:keys [children], {:keys [anchor text]} :value} toc-tree + li (make-toc-entry anchor text) + first-list-open (if outer-list? + (keyword (str (name list-open) ".content")) + list-open)] + ; Create hiccup sequence of :ol/:ul tag and sequence of :li tags + (if (seq children) + (let [sublist [first-list-open (map build-toc children + (repeat list-open) + (repeat :outer-list?) + (repeat false))]] + (if-let [li li] ; The root element has nothing so ignore it + (seq [li sublist]) ; Use seq to lazily concat the li with the sublists + sublist)) + li))) ; Or just return the naked :li tag + +(defn generate-toc "Reads an HTML string and parses it for headers, then returns a list of links to them. @@ -53,14 +98,13 @@ :ol and true will result in an ordered list being generated for the table of contents, while :ul will result in an unordered list. The default is an ordered list." - (let [li-tags (_list-types list-type) - top-tag (-> li-tags first (subs 1 3))] - (-> html - (.getBytes "UTF-8") - (java.io.ByteArrayInputStream.) - (html/parse) - :content - (get-headings) - (make-links li-tags) - (clojure.string/replace-first - (re-pattern top-tag) (str top-tag " class=\"contents\""))))) + [^String html & {:keys [list-type] :or {list-type :ol}}] + (-> html + (.getBytes "UTF-8") + (java.io.ByteArrayInputStream.) + (html/parse) + :content + (get-headings) + (build-toc-tree) + (build-toc list-type) + (hiccup/html))) diff --git a/test/cryogen_core/toc_test.clj b/test/cryogen_core/toc_test.clj new file mode 100644 index 0000000..4554205 --- /dev/null +++ b/test/cryogen_core/toc_test.clj @@ -0,0 +1,95 @@ +(ns cryogen-core.toc-test + (:require [clojure.test :refer :all] + [clojure.string :refer [join]] + [clojure.zip :as z] + [crouton.html :as html] + [hiccup.core :as hiccup] + [cryogen-core.toc :refer :all])) + +; Reimport private functions +(def get-headings #'cryogen-core.toc/get-headings) +(def make-toc-entry #'cryogen-core.toc/make-toc-entry) +(def zip-toc-tree-to-insertion-point #'cryogen-core.toc/zip-toc-tree-to-insertion-point) +(def insert-toc-tree-entry #'cryogen-core.toc/insert-toc-tree-entry) +(def build-toc-tree #'cryogen-core.toc/build-toc-tree) +(def build-toc #'cryogen-core.toc/build-toc) + +(defn parse-to-headings + [hiccup-seq] + (-> hiccup-seq hiccup/html html/parse-string :content get-headings)) + +; Test that the get-headings function properly filters non-headers +(deftest test-get-headings + (let [noisy-headers [:div [:h1 "First H1"] + [:p "Ignore..."] + [:h2 "First H2"]]] + (is (= (parse-to-headings noisy-headers) + [{:tag :h1 :attrs nil :content ["First H1"]} + {:tag :h2 :attrs nil :content ["First H2"]}])))) + +; Test that the make-toc-entry ignores invalid input +(deftest test-make-toc-entry + (is (nil? + (make-toc-entry nil "Text"))) + (is (nil? + (make-toc-entry "anchor" nil))) + (is (= [:li [:a {:href "#anchor"} "Text"]] + (make-toc-entry "anchor" "Text")))) + +; Test that the built table of contents always treats later +; headers as being at the same level as earlier headers, even +; if the later headers are strictly greater in value. +; E.G. +; * h2 +; * h3 +; * h1 +(deftest test-build-toc + (let [simplest-header [:div [:h2 [:a {:name "test"}] "Test"]] + no-headers [:div [:p "This is not a header"]] + closing-header-larger-than-opening-1 + [:div [:h2 [:a {:name "starting_low"}] + "Starting Low"] + [:h1 [:a {:name "finishing_high"}] + "Finishing High"]] + closing-header-larger-than-opening-2 + [:div [:h2 [:a {:name "starting_low"}] + "Starting Low"] + [:h4 [:a {:name "jumping_in"}] + "Jumping Right In"] + [:h3 [:a {:name "pulling_back"}] + "But then pull back"] + [:h2 [:a {:name "to_the_top"}] + "To the top"]]] + (is (= [:ol.content (seq [[:li [:a {:href "#test"} "Test"]]])] + (-> simplest-header parse-to-headings build-toc-tree + (build-toc :ol)))) + (is (nil? + (-> no-headers parse-to-headings build-toc-tree + (build-toc :ol)))) + (is (= [:ol.content (seq [[:li [:a {:href "#starting_low"} "Starting Low"]] + [:li [:a {:href "#finishing_high"} "Finishing High"]]])] + (-> closing-header-larger-than-opening-1 + parse-to-headings + build-toc-tree + (build-toc :ol))) + "No outer header should be less indented than the first header tag.") + (is (= [:ul.content + (seq [ + (seq [ + [:li [:a {:href "#starting_low"} "Starting Low"]] + [:ul + (seq [ + [:li [:a {:href "#jumping_in"} "Jumping Right In"]] + [:li [:a {:href "#pulling_back"} "But then pull back"]] + ]) + ] ]) + [:li [:a {:href "#to_the_top"} "To the top"]] + ])] + (-> closing-header-larger-than-opening-2 + parse-to-headings + build-toc-tree + (build-toc :ul))) + (join "" ["Inner headers can be more indented, " + "but outer headers cannot be less indented " + "than the original header."])) + ))