fix issue in url resolution
This commit is contained in:
parent
55514c2e41
commit
dbc5f8cd25
5 changed files with 72 additions and 8 deletions
|
@ -1,8 +1,17 @@
|
||||||
(ns mastodon-bot.infra
|
(ns mastodon-bot.infra
|
||||||
(:require
|
(:require
|
||||||
[cljs.reader :as edn]
|
[cljs.reader :as edn]
|
||||||
|
[clojure.pprint :refer [pprint]]
|
||||||
["fs" :as fs]))
|
["fs" :as fs]))
|
||||||
|
|
||||||
|
(defn debug [item]
|
||||||
|
(pprint item)
|
||||||
|
item)
|
||||||
|
|
||||||
|
(defn debug-first [item]
|
||||||
|
(pprint (first item))
|
||||||
|
item)
|
||||||
|
|
||||||
(defn js->edn [data]
|
(defn js->edn [data]
|
||||||
(js->clj data :keywordize-keys true))
|
(js->clj data :keywordize-keys true))
|
||||||
|
|
||||||
|
|
|
@ -11,11 +11,21 @@
|
||||||
(s/def ::feeds (s/coll-of ::feed))
|
(s/def ::feeds (s/coll-of ::feed))
|
||||||
(def rss-source? (s/keys :req-un [::feeds]))
|
(def rss-source? (s/keys :req-un [::feeds]))
|
||||||
|
|
||||||
|
(s/def ::title string?)
|
||||||
|
(s/def ::content string?)
|
||||||
|
(s/def ::link string?)
|
||||||
|
(s/def ::author string?)
|
||||||
|
(s/def ::isoDate string?)
|
||||||
|
(s/def ::pubDate string?)
|
||||||
|
(s/def ::feed-item (s/keys :req-un [::title ::content ::link]
|
||||||
|
:opt-un [::author ::isoDate ::pubDate]))
|
||||||
|
|
||||||
(defn-spec rss-client any?
|
(defn-spec rss-client any?
|
||||||
[]
|
[]
|
||||||
(rss.))
|
(rss.))
|
||||||
|
|
||||||
(defn parse-feed [item]
|
(defn-spec parse-feed any?
|
||||||
|
[item ::feed-item]
|
||||||
(let [{:keys [title isoDate pubDate content link]} item]
|
(let [{:keys [title isoDate pubDate content link]} item]
|
||||||
{:created-at (js/Date. (or isoDate pubDate))
|
{:created-at (js/Date. (or isoDate pubDate))
|
||||||
:text (str title
|
:text (str title
|
||||||
|
|
|
@ -82,8 +82,9 @@
|
||||||
(defn-spec intermediate-resolve-urls string?
|
(defn-spec intermediate-resolve-urls string?
|
||||||
[resolve-urls? ::resolve-urls?
|
[resolve-urls? ::resolve-urls?
|
||||||
input input?]
|
input input?]
|
||||||
(when resolve-urls?
|
(if resolve-urls?
|
||||||
(update input :text #(string/replace % shortened-url-pattern resolve-url))))
|
(update input :text #(string/replace % shortened-url-pattern resolve-url))
|
||||||
|
input))
|
||||||
|
|
||||||
(defn-spec content-filter-regexes ::content-filters
|
(defn-spec content-filter-regexes ::content-filters
|
||||||
[transformation ::transformation]
|
[transformation ::transformation]
|
||||||
|
@ -106,11 +107,6 @@
|
||||||
input input?]
|
input input?]
|
||||||
(update input :text #(reduce-kv string/replace % (:replacements transformation))))
|
(update input :text #(reduce-kv string/replace % (:replacements transformation))))
|
||||||
|
|
||||||
;TODO: remove in final code
|
|
||||||
(defn debug[item]
|
|
||||||
(println item)
|
|
||||||
item)
|
|
||||||
|
|
||||||
|
|
||||||
; TODO: move this to mastodon-api - seems to belong strongly to mastodon
|
; TODO: move this to mastodon-api - seems to belong strongly to mastodon
|
||||||
(defn-spec intermediate-to-mastodon mastodon-output?
|
(defn-spec intermediate-to-mastodon mastodon-output?
|
||||||
|
@ -218,6 +214,7 @@
|
||||||
(filter #(> (:created-at %) last-post-time))
|
(filter #(> (:created-at %) last-post-time))
|
||||||
(remove #(blocked-content? transformation (:text %)))
|
(remove #(blocked-content? transformation (:text %)))
|
||||||
(map #(intermediate-resolve-urls resolve-urls? %))
|
(map #(intermediate-resolve-urls resolve-urls? %))
|
||||||
|
(infra/debug-first)
|
||||||
(map #(perform-replacements transformation %))
|
(map #(perform-replacements transformation %))
|
||||||
(map #(intermediate-to-mastodon mastodon-auth target %))
|
(map #(intermediate-to-mastodon mastodon-auth target %))
|
||||||
(masto/post-items mastodon-auth target)))))
|
(masto/post-items mastodon-auth target)))))
|
||||||
|
|
|
@ -9,3 +9,36 @@
|
||||||
(is (s/valid? sut/rss-source?
|
(is (s/valid? sut/rss-source?
|
||||||
{:feeds [["correctiv-blog" "https://news.correctiv.org/news/rss.php"]]}
|
{:feeds [["correctiv-blog" "https://news.correctiv.org/news/rss.php"]]}
|
||||||
)))
|
)))
|
||||||
|
|
||||||
|
(def reddit-feed-item {:title "Datahike release 0.3.1"
|
||||||
|
:link
|
||||||
|
"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"
|
||||||
|
:pubDate "2020-06-26T00:36:48.000Z"
|
||||||
|
:author "/u/yogthos"
|
||||||
|
:content
|
||||||
|
"  submitted by   <a href=\"https://www.reddit.com/user/yogthos\"> /u/yogthos </a> <br/> <span><a href=\"https://github.com/replikativ/datahike/releases/tag/v0.3.1\">[link]</a></span>   <span><a href=\"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/\">[comments]</a></span>"
|
||||||
|
:contentSnippet "submitted by /u/yogthos [link] [comments]"
|
||||||
|
:id "t3_hfxotu"
|
||||||
|
:isoDate "2020-06-26T00:36:48.000Z"})
|
||||||
|
|
||||||
|
(def hnrss-org-feed-item {:creator "seacaster"
|
||||||
|
:isoDate "2020-06-26T12:17:33.000Z"
|
||||||
|
:content
|
||||||
|
"\n<p>Article URL: <a href=\"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\">https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/</a></p>\n<p>Comments URL: <a href=\"https://news.ycombinator.com/item?id=23651117\">https://news.ycombinator.com/item?id=23651117</a></p>\n<p>Points: 1</p>\n<p># Comments: 0</p>\n"
|
||||||
|
:comments "https://news.ycombinator.com/item?id=23651117"
|
||||||
|
:dc:creator "seacaster"
|
||||||
|
:pubDate "Fri, 26 Jun 2020 12:17:33 +0000"
|
||||||
|
:contentSnippet
|
||||||
|
"Article URL: https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\nComments URL: https://news.ycombinator.com/item?id=23651117\nPoints: 1\n# Comments: 0"
|
||||||
|
:title "Taking Theatre Online with WebGL and WebRTC"
|
||||||
|
:link
|
||||||
|
"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"
|
||||||
|
:guid "https://news.ycombinator.com/item?id=23651117"})
|
||||||
|
|
||||||
|
(deftest items-should-be-parsed
|
||||||
|
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
|
||||||
|
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
|
||||||
|
(sut/parse-feed hnrss-org-feed-item)))
|
||||||
|
(is (= {:created-at #inst "2020-06-26T00:36:48.000-00:00",
|
||||||
|
:text "Datahike release 0.3.1\n\nhttps://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"}
|
||||||
|
(sut/parse-feed reddit-feed-item))))
|
||||||
|
|
15
src/test/mastodon_bot/transform_rss_test.cljs
Executable file
15
src/test/mastodon_bot/transform_rss_test.cljs
Executable file
|
@ -0,0 +1,15 @@
|
||||||
|
(ns mastodon-bot.transform-rss-test
|
||||||
|
(:require
|
||||||
|
[cljs.test :refer-macros [deftest is testing run-tests]]
|
||||||
|
[clojure.spec.alpha :as s]
|
||||||
|
[mastodon-bot.transform :as sut]
|
||||||
|
))
|
||||||
|
|
||||||
|
(def intermediate-rss-item {:created-at #inst "2020-06-26T12:17:33.000-00:00"
|
||||||
|
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"})
|
||||||
|
|
||||||
|
(deftest should-resolve-urls
|
||||||
|
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
|
||||||
|
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
|
||||||
|
(sut/intermediate-resolve-urls false intermediate-rss-item)))
|
||||||
|
)
|
Reference in a new issue