From cab5e24845df21421d187c96c1688e94ab4cd0b5 Mon Sep 17 00:00:00 2001 From: jem Date: Sat, 30 May 2020 14:49:47 +0200 Subject: [PATCH] mv filters to transform --- README.md | 17 ++++---- src/main/mastodon_bot/mastodon_api.cljs | 26 +----------- src/main/mastodon_bot/transform.cljs | 51 ++++++++++++++++------- src/test/mastodon_bot/transform_test.cljs | 6 ++- 4 files changed, 48 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 634deeb..5bd038f 100644 --- a/README.md +++ b/README.md @@ -50,15 +50,6 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w ;; e.g: https://mastodon.social/web/accounts/294795 :account-id "XXXX" :api_url "https://botsin.space/api/v1/" - ;; optionally try to resolve URLs in posts to skip URL shorteners - ;; defaults to false - :resolve-urls? true - ;; optional content filter regexes - ;; any posts matching the regexes will be filtered out - :content-filters [".*bannedsite.*"] - ;; optional keyword filter regexes - ;; any posts not matching the regexes will be filtered out - :keyword-filters [".*clojure.*"] } :transform [{:source {:type :twitter-source ;; optional, defaults to false @@ -87,7 +78,13 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w :signature "#newsbot"} ;; optionally try to resolve URLs in posts to skip URL shorteners ;; defaults to false - :resolve-urls? true}] + :resolve-urls? true + ;; optional content filter regexes + ;; any posts matching the regexes will be filtered out + :content-filters [".*bannedsite.*"] + ;; optional keyword filter regexes + ;; any posts not matching the regexes will be filtered out + :keyword-filters [".*clojure.*"]}] } ``` diff --git a/src/main/mastodon_bot/mastodon_api.cljs b/src/main/mastodon_bot/mastodon_api.cljs index a5064ba..6deed6e 100755 --- a/src/main/mastodon_bot/mastodon_api.cljs +++ b/src/main/mastodon_bot/mastodon_api.cljs @@ -11,8 +11,6 @@ (s/def ::access_token string?) (s/def ::api_url string?) (s/def ::account-id string?) -(s/def ::content-filter string?) -(s/def ::keyword-filter string?) (s/def ::append-screen-name? boolean?) (s/def ::signature string?) (s/def ::sensitive? boolean?) @@ -24,12 +22,9 @@ (<= n 500) (> n 0)))) -(s/def ::content-filters (s/* ::content-filter)) -(s/def ::keyword-filters (s/* ::keyword-filter)) + (def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url])) -(def mastodon-target? (s/keys :opt-un [ - ;::content-filters ::keyword-filters - ::max-post-length +(def mastodon-target? (s/keys :opt-un [::max-post-length ::signature ::visibility ::append-screen-name? @@ -40,14 +35,6 @@ (def mastodon-config? (s/merge mastodon-auth? mastodon-target?)) -(defn-spec content-filter-regexes ::content-filters - [mastodon-config mastodon-config?] - (mapv re-pattern (:content-filters mastodon-config))) - -(defn-spec keyword-filter-regexes ::keyword-filters - [mastodon-config mastodon-config?] - (mapv re-pattern (:keyword-filters mastodon-config))) - (defn-spec max-post-length ::max-post-length [target mastodon-target?] (:max-post-length target)) @@ -64,14 +51,6 @@ mastodon.) (infra/exit-with-error "missing Mastodon auth configuration!"))) -(defn-spec blocked-content? boolean? - [mastodon-config mastodon-config? - text string?] - (boolean - (or (some #(re-find % text) (content-filter-regexes mastodon-config)) - (when (not-empty (keyword-filter-regexes mastodon-config)) - (empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config))))))) - (defn-spec delete-status any? [mastodon-config mastodon-config? status-id string?] @@ -129,7 +108,6 @@ items any?] (doseq [{:keys [text media-links]} (->> items - (remove #(blocked-content? mastodon-auth (:text %))) (filter #(> (:created-at %) last-post-time)))] (if media-links (post-status-with-images mastodon-auth target text media-links) diff --git a/src/main/mastodon_bot/transform.cljs b/src/main/mastodon_bot/transform.cljs index e7b7dc6..8cff295 100644 --- a/src/main/mastodon_bot/transform.cljs +++ b/src/main/mastodon_bot/transform.cljs @@ -22,6 +22,10 @@ :opt-un [::media-links])) (s/def ::type keyword?) (s/def ::resolve-urls? boolean?) +(s/def ::content-filter string?) +(s/def ::content-filters (s/* ::content-filter)) +(s/def ::keyword-filter string?) +(s/def ::keyword-filters (s/* ::keyword-filter)) (defmulti source-type :type) (defmethod source-type :twitter-source [_] (s/merge (s/keys :req-un[::type]) twitter/twitter-source?)) @@ -32,7 +36,7 @@ (s/def ::target (s/multi-spec target-type ::type)) (s/def ::transformation (s/keys :req-un [::source ::target] - :opt-un [::resolve-urls?])) + :opt-un [::resolve-urls? ::content-filters ::keyword-filters])) (def transformations? (s/* ::transformation)) (defn trim-text [text max-post-length] @@ -73,6 +77,23 @@ (when resolve-urls? (update input :text #(string/replace % shortened-url-pattern resolve-url)))) +(defn-spec content-filter-regexes ::content-filters + [transformation ::transformation] + (mapv re-pattern (:content-filters transformation))) + +(defn-spec keyword-filter-regexes ::keyword-filters + [transformation ::transformation] + (mapv re-pattern (:keyword-filters transformation))) + +(defn-spec blocked-content? boolean? + [transformation ::transformation + text string?] + (boolean + (or (some #(re-find % text) (content-filter-regexes transformation)) + (when (not-empty (keyword-filter-regexes transformation)) + (empty? (some #(re-find % text) (keyword-filter-regexes transformation))))))) + + ; TODO: move this to mastodon-api - seems to belong strongly to mastodon (defn-spec intermediate-to-mastodon mastodon-output? [mastodon-auth masto/mastodon-auth? @@ -100,19 +121,19 @@ (defn-spec post-tweets-to-mastodon any? [mastodon-auth masto/mastodon-auth? - resolve-urls? ::resolve-urls? - source twitter/twitter-source? - target masto/mastodon-target? + transformation ::transformation last-post-time any?] - (fn [error tweets response] - (if error - (infra/exit-with-error error) - (->> (infra/js->edn tweets) - (map twitter/parse-tweet) - (map #(intermediate-resolve-urls resolve-urls? %)) - (map #(twitter/nitter-url source %)) - (map #(intermediate-to-mastodon mastodon-auth target %)) - (masto/post-items mastodon-auth target last-post-time))))) + (let [{:keys [source target resolve-urls?]} transformation] + (fn [error tweets response] + (if error + (infra/exit-with-error error) + (->> (infra/js->edn tweets) + (map twitter/parse-tweet) + (remove #(blocked-content? transformation (:text %))) + (map #(intermediate-resolve-urls resolve-urls? %)) + (map #(twitter/nitter-url source %)) + (map #(intermediate-to-mastodon mastodon-auth target %)) + (masto/post-items mastodon-auth target last-post-time)))))) (defn-spec tweets-to-mastodon any? [mastodon-auth masto/mastodon-auth? @@ -127,7 +148,5 @@ account (post-tweets-to-mastodon mastodon-auth - resolve-urls? - source - target + transformation last-post-time))))) diff --git a/src/test/mastodon_bot/transform_test.cljs b/src/test/mastodon_bot/transform_test.cljs index f57dc0e..7244982 100755 --- a/src/test/mastodon_bot/transform_test.cljs +++ b/src/test/mastodon_bot/transform_test.cljs @@ -13,7 +13,7 @@ :include-replies? false :include-rts? true :nitter-urls? true - :accounts ["an-twitter-account"]} + :accounts ["an-twitter-account"]} :target {:type :mastodon-target :append-screen-name? true :media-only? false @@ -21,4 +21,6 @@ :visibility "unlisted" :sensitive? true :signature "my-bot"} - :resolve-urls? true}]))) + :resolve-urls? true + :content-filters [".*bannedsite.*"] + :keyword-filters [".*"]}])))