mv filters to transform

This commit is contained in:
jem 2020-05-30 14:49:47 +02:00
parent 8719e7fd15
commit cab5e24845
4 changed files with 48 additions and 52 deletions

View file

@ -50,15 +50,6 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w
;; e.g: https://mastodon.social/web/accounts/294795 ;; e.g: https://mastodon.social/web/accounts/294795
:account-id "XXXX" :account-id "XXXX"
:api_url "https://botsin.space/api/v1/" :api_url "https://botsin.space/api/v1/"
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]
} }
:transform [{:source {:type :twitter-source :transform [{:source {:type :twitter-source
;; optional, defaults to false ;; optional, defaults to false
@ -87,7 +78,13 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w
:signature "#newsbot"} :signature "#newsbot"}
;; optionally try to resolve URLs in posts to skip URL shorteners ;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false ;; defaults to false
:resolve-urls? true}] :resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]}]
} }
``` ```

View file

@ -11,8 +11,6 @@
(s/def ::access_token string?) (s/def ::access_token string?)
(s/def ::api_url string?) (s/def ::api_url string?)
(s/def ::account-id string?) (s/def ::account-id string?)
(s/def ::content-filter string?)
(s/def ::keyword-filter string?)
(s/def ::append-screen-name? boolean?) (s/def ::append-screen-name? boolean?)
(s/def ::signature string?) (s/def ::signature string?)
(s/def ::sensitive? boolean?) (s/def ::sensitive? boolean?)
@ -24,12 +22,9 @@
(<= n 500) (<= n 500)
(> n 0)))) (> n 0))))
(s/def ::content-filters (s/* ::content-filter))
(s/def ::keyword-filters (s/* ::keyword-filter))
(def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url])) (def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url]))
(def mastodon-target? (s/keys :opt-un [ (def mastodon-target? (s/keys :opt-un [::max-post-length
;::content-filters ::keyword-filters
::max-post-length
::signature ::signature
::visibility ::visibility
::append-screen-name? ::append-screen-name?
@ -40,14 +35,6 @@
(def mastodon-config? (s/merge mastodon-auth? mastodon-target?)) (def mastodon-config? (s/merge mastodon-auth? mastodon-target?))
(defn-spec content-filter-regexes ::content-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:content-filters mastodon-config)))
(defn-spec keyword-filter-regexes ::keyword-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:keyword-filters mastodon-config)))
(defn-spec max-post-length ::max-post-length (defn-spec max-post-length ::max-post-length
[target mastodon-target?] [target mastodon-target?]
(:max-post-length target)) (:max-post-length target))
@ -64,14 +51,6 @@
mastodon.) mastodon.)
(infra/exit-with-error "missing Mastodon auth configuration!"))) (infra/exit-with-error "missing Mastodon auth configuration!")))
(defn-spec blocked-content? boolean?
[mastodon-config mastodon-config?
text string?]
(boolean
(or (some #(re-find % text) (content-filter-regexes mastodon-config))
(when (not-empty (keyword-filter-regexes mastodon-config))
(empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config)))))))
(defn-spec delete-status any? (defn-spec delete-status any?
[mastodon-config mastodon-config? [mastodon-config mastodon-config?
status-id string?] status-id string?]
@ -129,7 +108,6 @@
items any?] items any?]
(doseq [{:keys [text media-links]} (doseq [{:keys [text media-links]}
(->> items (->> items
(remove #(blocked-content? mastodon-auth (:text %)))
(filter #(> (:created-at %) last-post-time)))] (filter #(> (:created-at %) last-post-time)))]
(if media-links (if media-links
(post-status-with-images mastodon-auth target text media-links) (post-status-with-images mastodon-auth target text media-links)

View file

@ -22,6 +22,10 @@
:opt-un [::media-links])) :opt-un [::media-links]))
(s/def ::type keyword?) (s/def ::type keyword?)
(s/def ::resolve-urls? boolean?) (s/def ::resolve-urls? boolean?)
(s/def ::content-filter string?)
(s/def ::content-filters (s/* ::content-filter))
(s/def ::keyword-filter string?)
(s/def ::keyword-filters (s/* ::keyword-filter))
(defmulti source-type :type) (defmulti source-type :type)
(defmethod source-type :twitter-source [_] (defmethod source-type :twitter-source [_]
(s/merge (s/keys :req-un[::type]) twitter/twitter-source?)) (s/merge (s/keys :req-un[::type]) twitter/twitter-source?))
@ -32,7 +36,7 @@
(s/def ::target (s/multi-spec target-type ::type)) (s/def ::target (s/multi-spec target-type ::type))
(s/def ::transformation (s/keys :req-un [::source ::target] (s/def ::transformation (s/keys :req-un [::source ::target]
:opt-un [::resolve-urls?])) :opt-un [::resolve-urls? ::content-filters ::keyword-filters]))
(def transformations? (s/* ::transformation)) (def transformations? (s/* ::transformation))
(defn trim-text [text max-post-length] (defn trim-text [text max-post-length]
@ -73,6 +77,23 @@
(when resolve-urls? (when resolve-urls?
(update input :text #(string/replace % shortened-url-pattern resolve-url)))) (update input :text #(string/replace % shortened-url-pattern resolve-url))))
(defn-spec content-filter-regexes ::content-filters
[transformation ::transformation]
(mapv re-pattern (:content-filters transformation)))
(defn-spec keyword-filter-regexes ::keyword-filters
[transformation ::transformation]
(mapv re-pattern (:keyword-filters transformation)))
(defn-spec blocked-content? boolean?
[transformation ::transformation
text string?]
(boolean
(or (some #(re-find % text) (content-filter-regexes transformation))
(when (not-empty (keyword-filter-regexes transformation))
(empty? (some #(re-find % text) (keyword-filter-regexes transformation)))))))
; TODO: move this to mastodon-api - seems to belong strongly to mastodon ; TODO: move this to mastodon-api - seems to belong strongly to mastodon
(defn-spec intermediate-to-mastodon mastodon-output? (defn-spec intermediate-to-mastodon mastodon-output?
[mastodon-auth masto/mastodon-auth? [mastodon-auth masto/mastodon-auth?
@ -100,19 +121,19 @@
(defn-spec post-tweets-to-mastodon any? (defn-spec post-tweets-to-mastodon any?
[mastodon-auth masto/mastodon-auth? [mastodon-auth masto/mastodon-auth?
resolve-urls? ::resolve-urls? transformation ::transformation
source twitter/twitter-source?
target masto/mastodon-target?
last-post-time any?] last-post-time any?]
(fn [error tweets response] (let [{:keys [source target resolve-urls?]} transformation]
(if error (fn [error tweets response]
(infra/exit-with-error error) (if error
(->> (infra/js->edn tweets) (infra/exit-with-error error)
(map twitter/parse-tweet) (->> (infra/js->edn tweets)
(map #(intermediate-resolve-urls resolve-urls? %)) (map twitter/parse-tweet)
(map #(twitter/nitter-url source %)) (remove #(blocked-content? transformation (:text %)))
(map #(intermediate-to-mastodon mastodon-auth target %)) (map #(intermediate-resolve-urls resolve-urls? %))
(masto/post-items mastodon-auth target last-post-time))))) (map #(twitter/nitter-url source %))
(map #(intermediate-to-mastodon mastodon-auth target %))
(masto/post-items mastodon-auth target last-post-time))))))
(defn-spec tweets-to-mastodon any? (defn-spec tweets-to-mastodon any?
[mastodon-auth masto/mastodon-auth? [mastodon-auth masto/mastodon-auth?
@ -127,7 +148,5 @@
account account
(post-tweets-to-mastodon (post-tweets-to-mastodon
mastodon-auth mastodon-auth
resolve-urls? transformation
source
target
last-post-time))))) last-post-time)))))

View file

@ -21,4 +21,6 @@
:visibility "unlisted" :visibility "unlisted"
:sensitive? true :sensitive? true
:signature "my-bot"} :signature "my-bot"}
:resolve-urls? true}]))) :resolve-urls? true
:content-filters [".*bannedsite.*"]
:keyword-filters [".*"]}])))