fix issue in url resolution

This commit is contained in:
jem 2020-06-26 16:00:36 +02:00
parent 55514c2e41
commit dbc5f8cd25
5 changed files with 72 additions and 8 deletions

View file

@ -1,8 +1,17 @@
(ns mastodon-bot.infra (ns mastodon-bot.infra
(:require (:require
[cljs.reader :as edn] [cljs.reader :as edn]
[clojure.pprint :refer [pprint]]
["fs" :as fs])) ["fs" :as fs]))
(defn debug [item]
(pprint item)
item)
(defn debug-first [item]
(pprint (first item))
item)
(defn js->edn [data] (defn js->edn [data]
(js->clj data :keywordize-keys true)) (js->clj data :keywordize-keys true))

View file

@ -11,11 +11,21 @@
(s/def ::feeds (s/coll-of ::feed)) (s/def ::feeds (s/coll-of ::feed))
(def rss-source? (s/keys :req-un [::feeds])) (def rss-source? (s/keys :req-un [::feeds]))
(s/def ::title string?)
(s/def ::content string?)
(s/def ::link string?)
(s/def ::author string?)
(s/def ::isoDate string?)
(s/def ::pubDate string?)
(s/def ::feed-item (s/keys :req-un [::title ::content ::link]
:opt-un [::author ::isoDate ::pubDate]))
(defn-spec rss-client any? (defn-spec rss-client any?
[] []
(rss.)) (rss.))
(defn parse-feed [item] (defn-spec parse-feed any?
[item ::feed-item]
(let [{:keys [title isoDate pubDate content link]} item] (let [{:keys [title isoDate pubDate content link]} item]
{:created-at (js/Date. (or isoDate pubDate)) {:created-at (js/Date. (or isoDate pubDate))
:text (str title :text (str title

View file

@ -82,8 +82,9 @@
(defn-spec intermediate-resolve-urls string? (defn-spec intermediate-resolve-urls string?
[resolve-urls? ::resolve-urls? [resolve-urls? ::resolve-urls?
input input?] input input?]
(when resolve-urls? (if resolve-urls?
(update input :text #(string/replace % shortened-url-pattern resolve-url)))) (update input :text #(string/replace % shortened-url-pattern resolve-url))
input))
(defn-spec content-filter-regexes ::content-filters (defn-spec content-filter-regexes ::content-filters
[transformation ::transformation] [transformation ::transformation]
@ -106,11 +107,6 @@
input input?] input input?]
(update input :text #(reduce-kv string/replace % (:replacements transformation)))) (update input :text #(reduce-kv string/replace % (:replacements transformation))))
;TODO: remove in final code
(defn debug[item]
(println item)
item)
; TODO: move this to mastodon-api - seems to belong strongly to mastodon ; TODO: move this to mastodon-api - seems to belong strongly to mastodon
(defn-spec intermediate-to-mastodon mastodon-output? (defn-spec intermediate-to-mastodon mastodon-output?
@ -218,6 +214,7 @@
(filter #(> (:created-at %) last-post-time)) (filter #(> (:created-at %) last-post-time))
(remove #(blocked-content? transformation (:text %))) (remove #(blocked-content? transformation (:text %)))
(map #(intermediate-resolve-urls resolve-urls? %)) (map #(intermediate-resolve-urls resolve-urls? %))
(infra/debug-first)
(map #(perform-replacements transformation %)) (map #(perform-replacements transformation %))
(map #(intermediate-to-mastodon mastodon-auth target %)) (map #(intermediate-to-mastodon mastodon-auth target %))
(masto/post-items mastodon-auth target))))) (masto/post-items mastodon-auth target)))))

View file

@ -9,3 +9,36 @@
(is (s/valid? sut/rss-source? (is (s/valid? sut/rss-source?
{:feeds [["correctiv-blog" "https://news.correctiv.org/news/rss.php"]]} {:feeds [["correctiv-blog" "https://news.correctiv.org/news/rss.php"]]}
))) )))
(def reddit-feed-item {:title "Datahike release 0.3.1"
:link
"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"
:pubDate "2020-06-26T00:36:48.000Z"
:author "/u/yogthos"
:content
"&#32; submitted by &#32; <a href=\"https://www.reddit.com/user/yogthos\"> /u/yogthos </a> <br/> <span><a href=\"https://github.com/replikativ/datahike/releases/tag/v0.3.1\">[link]</a></span> &#32; <span><a href=\"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/\">[comments]</a></span>"
:contentSnippet "submitted by /u/yogthos [link] [comments]"
:id "t3_hfxotu"
:isoDate "2020-06-26T00:36:48.000Z"})
(def hnrss-org-feed-item {:creator "seacaster"
:isoDate "2020-06-26T12:17:33.000Z"
:content
"\n<p>Article URL: <a href=\"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\">https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/</a></p>\n<p>Comments URL: <a href=\"https://news.ycombinator.com/item?id=23651117\">https://news.ycombinator.com/item?id=23651117</a></p>\n<p>Points: 1</p>\n<p># Comments: 0</p>\n"
:comments "https://news.ycombinator.com/item?id=23651117"
:dc:creator "seacaster"
:pubDate "Fri, 26 Jun 2020 12:17:33 +0000"
:contentSnippet
"Article URL: https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\nComments URL: https://news.ycombinator.com/item?id=23651117\nPoints: 1\n# Comments: 0"
:title "Taking Theatre Online with WebGL and WebRTC"
:link
"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"
:guid "https://news.ycombinator.com/item?id=23651117"})
(deftest items-should-be-parsed
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
(sut/parse-feed hnrss-org-feed-item)))
(is (= {:created-at #inst "2020-06-26T00:36:48.000-00:00",
:text "Datahike release 0.3.1\n\nhttps://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"}
(sut/parse-feed reddit-feed-item))))

View file

@ -0,0 +1,15 @@
(ns mastodon-bot.transform-rss-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[clojure.spec.alpha :as s]
[mastodon-bot.transform :as sut]
))
(def intermediate-rss-item {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"})
(deftest should-resolve-urls
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
(sut/intermediate-resolve-urls false intermediate-rss-item)))
)