mv filters to transform
This commit is contained in:
parent
8719e7fd15
commit
cab5e24845
4 changed files with 48 additions and 52 deletions
17
README.md
17
README.md
|
@ -50,15 +50,6 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w
|
||||||
;; e.g: https://mastodon.social/web/accounts/294795
|
;; e.g: https://mastodon.social/web/accounts/294795
|
||||||
:account-id "XXXX"
|
:account-id "XXXX"
|
||||||
:api_url "https://botsin.space/api/v1/"
|
:api_url "https://botsin.space/api/v1/"
|
||||||
;; optionally try to resolve URLs in posts to skip URL shorteners
|
|
||||||
;; defaults to false
|
|
||||||
:resolve-urls? true
|
|
||||||
;; optional content filter regexes
|
|
||||||
;; any posts matching the regexes will be filtered out
|
|
||||||
:content-filters [".*bannedsite.*"]
|
|
||||||
;; optional keyword filter regexes
|
|
||||||
;; any posts not matching the regexes will be filtered out
|
|
||||||
:keyword-filters [".*clojure.*"]
|
|
||||||
}
|
}
|
||||||
:transform [{:source {:type :twitter-source
|
:transform [{:source {:type :twitter-source
|
||||||
;; optional, defaults to false
|
;; optional, defaults to false
|
||||||
|
@ -87,7 +78,13 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w
|
||||||
:signature "#newsbot"}
|
:signature "#newsbot"}
|
||||||
;; optionally try to resolve URLs in posts to skip URL shorteners
|
;; optionally try to resolve URLs in posts to skip URL shorteners
|
||||||
;; defaults to false
|
;; defaults to false
|
||||||
:resolve-urls? true}]
|
:resolve-urls? true
|
||||||
|
;; optional content filter regexes
|
||||||
|
;; any posts matching the regexes will be filtered out
|
||||||
|
:content-filters [".*bannedsite.*"]
|
||||||
|
;; optional keyword filter regexes
|
||||||
|
;; any posts not matching the regexes will be filtered out
|
||||||
|
:keyword-filters [".*clojure.*"]}]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,6 @@
|
||||||
(s/def ::access_token string?)
|
(s/def ::access_token string?)
|
||||||
(s/def ::api_url string?)
|
(s/def ::api_url string?)
|
||||||
(s/def ::account-id string?)
|
(s/def ::account-id string?)
|
||||||
(s/def ::content-filter string?)
|
|
||||||
(s/def ::keyword-filter string?)
|
|
||||||
(s/def ::append-screen-name? boolean?)
|
(s/def ::append-screen-name? boolean?)
|
||||||
(s/def ::signature string?)
|
(s/def ::signature string?)
|
||||||
(s/def ::sensitive? boolean?)
|
(s/def ::sensitive? boolean?)
|
||||||
|
@ -24,12 +22,9 @@
|
||||||
(<= n 500)
|
(<= n 500)
|
||||||
(> n 0))))
|
(> n 0))))
|
||||||
|
|
||||||
(s/def ::content-filters (s/* ::content-filter))
|
|
||||||
(s/def ::keyword-filters (s/* ::keyword-filter))
|
|
||||||
(def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url]))
|
(def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url]))
|
||||||
(def mastodon-target? (s/keys :opt-un [
|
(def mastodon-target? (s/keys :opt-un [::max-post-length
|
||||||
;::content-filters ::keyword-filters
|
|
||||||
::max-post-length
|
|
||||||
::signature
|
::signature
|
||||||
::visibility
|
::visibility
|
||||||
::append-screen-name?
|
::append-screen-name?
|
||||||
|
@ -40,14 +35,6 @@
|
||||||
(def mastodon-config? (s/merge mastodon-auth? mastodon-target?))
|
(def mastodon-config? (s/merge mastodon-auth? mastodon-target?))
|
||||||
|
|
||||||
|
|
||||||
(defn-spec content-filter-regexes ::content-filters
|
|
||||||
[mastodon-config mastodon-config?]
|
|
||||||
(mapv re-pattern (:content-filters mastodon-config)))
|
|
||||||
|
|
||||||
(defn-spec keyword-filter-regexes ::keyword-filters
|
|
||||||
[mastodon-config mastodon-config?]
|
|
||||||
(mapv re-pattern (:keyword-filters mastodon-config)))
|
|
||||||
|
|
||||||
(defn-spec max-post-length ::max-post-length
|
(defn-spec max-post-length ::max-post-length
|
||||||
[target mastodon-target?]
|
[target mastodon-target?]
|
||||||
(:max-post-length target))
|
(:max-post-length target))
|
||||||
|
@ -64,14 +51,6 @@
|
||||||
mastodon.)
|
mastodon.)
|
||||||
(infra/exit-with-error "missing Mastodon auth configuration!")))
|
(infra/exit-with-error "missing Mastodon auth configuration!")))
|
||||||
|
|
||||||
(defn-spec blocked-content? boolean?
|
|
||||||
[mastodon-config mastodon-config?
|
|
||||||
text string?]
|
|
||||||
(boolean
|
|
||||||
(or (some #(re-find % text) (content-filter-regexes mastodon-config))
|
|
||||||
(when (not-empty (keyword-filter-regexes mastodon-config))
|
|
||||||
(empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config)))))))
|
|
||||||
|
|
||||||
(defn-spec delete-status any?
|
(defn-spec delete-status any?
|
||||||
[mastodon-config mastodon-config?
|
[mastodon-config mastodon-config?
|
||||||
status-id string?]
|
status-id string?]
|
||||||
|
@ -129,7 +108,6 @@
|
||||||
items any?]
|
items any?]
|
||||||
(doseq [{:keys [text media-links]}
|
(doseq [{:keys [text media-links]}
|
||||||
(->> items
|
(->> items
|
||||||
(remove #(blocked-content? mastodon-auth (:text %)))
|
|
||||||
(filter #(> (:created-at %) last-post-time)))]
|
(filter #(> (:created-at %) last-post-time)))]
|
||||||
(if media-links
|
(if media-links
|
||||||
(post-status-with-images mastodon-auth target text media-links)
|
(post-status-with-images mastodon-auth target text media-links)
|
||||||
|
|
|
@ -22,6 +22,10 @@
|
||||||
:opt-un [::media-links]))
|
:opt-un [::media-links]))
|
||||||
(s/def ::type keyword?)
|
(s/def ::type keyword?)
|
||||||
(s/def ::resolve-urls? boolean?)
|
(s/def ::resolve-urls? boolean?)
|
||||||
|
(s/def ::content-filter string?)
|
||||||
|
(s/def ::content-filters (s/* ::content-filter))
|
||||||
|
(s/def ::keyword-filter string?)
|
||||||
|
(s/def ::keyword-filters (s/* ::keyword-filter))
|
||||||
(defmulti source-type :type)
|
(defmulti source-type :type)
|
||||||
(defmethod source-type :twitter-source [_]
|
(defmethod source-type :twitter-source [_]
|
||||||
(s/merge (s/keys :req-un[::type]) twitter/twitter-source?))
|
(s/merge (s/keys :req-un[::type]) twitter/twitter-source?))
|
||||||
|
@ -32,7 +36,7 @@
|
||||||
(s/def ::target (s/multi-spec target-type ::type))
|
(s/def ::target (s/multi-spec target-type ::type))
|
||||||
|
|
||||||
(s/def ::transformation (s/keys :req-un [::source ::target]
|
(s/def ::transformation (s/keys :req-un [::source ::target]
|
||||||
:opt-un [::resolve-urls?]))
|
:opt-un [::resolve-urls? ::content-filters ::keyword-filters]))
|
||||||
(def transformations? (s/* ::transformation))
|
(def transformations? (s/* ::transformation))
|
||||||
|
|
||||||
(defn trim-text [text max-post-length]
|
(defn trim-text [text max-post-length]
|
||||||
|
@ -73,6 +77,23 @@
|
||||||
(when resolve-urls?
|
(when resolve-urls?
|
||||||
(update input :text #(string/replace % shortened-url-pattern resolve-url))))
|
(update input :text #(string/replace % shortened-url-pattern resolve-url))))
|
||||||
|
|
||||||
|
(defn-spec content-filter-regexes ::content-filters
|
||||||
|
[transformation ::transformation]
|
||||||
|
(mapv re-pattern (:content-filters transformation)))
|
||||||
|
|
||||||
|
(defn-spec keyword-filter-regexes ::keyword-filters
|
||||||
|
[transformation ::transformation]
|
||||||
|
(mapv re-pattern (:keyword-filters transformation)))
|
||||||
|
|
||||||
|
(defn-spec blocked-content? boolean?
|
||||||
|
[transformation ::transformation
|
||||||
|
text string?]
|
||||||
|
(boolean
|
||||||
|
(or (some #(re-find % text) (content-filter-regexes transformation))
|
||||||
|
(when (not-empty (keyword-filter-regexes transformation))
|
||||||
|
(empty? (some #(re-find % text) (keyword-filter-regexes transformation)))))))
|
||||||
|
|
||||||
|
|
||||||
; TODO: move this to mastodon-api - seems to belong strongly to mastodon
|
; TODO: move this to mastodon-api - seems to belong strongly to mastodon
|
||||||
(defn-spec intermediate-to-mastodon mastodon-output?
|
(defn-spec intermediate-to-mastodon mastodon-output?
|
||||||
[mastodon-auth masto/mastodon-auth?
|
[mastodon-auth masto/mastodon-auth?
|
||||||
|
@ -100,19 +121,19 @@
|
||||||
|
|
||||||
(defn-spec post-tweets-to-mastodon any?
|
(defn-spec post-tweets-to-mastodon any?
|
||||||
[mastodon-auth masto/mastodon-auth?
|
[mastodon-auth masto/mastodon-auth?
|
||||||
resolve-urls? ::resolve-urls?
|
transformation ::transformation
|
||||||
source twitter/twitter-source?
|
|
||||||
target masto/mastodon-target?
|
|
||||||
last-post-time any?]
|
last-post-time any?]
|
||||||
(fn [error tweets response]
|
(let [{:keys [source target resolve-urls?]} transformation]
|
||||||
(if error
|
(fn [error tweets response]
|
||||||
(infra/exit-with-error error)
|
(if error
|
||||||
(->> (infra/js->edn tweets)
|
(infra/exit-with-error error)
|
||||||
(map twitter/parse-tweet)
|
(->> (infra/js->edn tweets)
|
||||||
(map #(intermediate-resolve-urls resolve-urls? %))
|
(map twitter/parse-tweet)
|
||||||
(map #(twitter/nitter-url source %))
|
(remove #(blocked-content? transformation (:text %)))
|
||||||
(map #(intermediate-to-mastodon mastodon-auth target %))
|
(map #(intermediate-resolve-urls resolve-urls? %))
|
||||||
(masto/post-items mastodon-auth target last-post-time)))))
|
(map #(twitter/nitter-url source %))
|
||||||
|
(map #(intermediate-to-mastodon mastodon-auth target %))
|
||||||
|
(masto/post-items mastodon-auth target last-post-time))))))
|
||||||
|
|
||||||
(defn-spec tweets-to-mastodon any?
|
(defn-spec tweets-to-mastodon any?
|
||||||
[mastodon-auth masto/mastodon-auth?
|
[mastodon-auth masto/mastodon-auth?
|
||||||
|
@ -127,7 +148,5 @@
|
||||||
account
|
account
|
||||||
(post-tweets-to-mastodon
|
(post-tweets-to-mastodon
|
||||||
mastodon-auth
|
mastodon-auth
|
||||||
resolve-urls?
|
transformation
|
||||||
source
|
|
||||||
target
|
|
||||||
last-post-time)))))
|
last-post-time)))))
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
:include-replies? false
|
:include-replies? false
|
||||||
:include-rts? true
|
:include-rts? true
|
||||||
:nitter-urls? true
|
:nitter-urls? true
|
||||||
:accounts ["an-twitter-account"]}
|
:accounts ["an-twitter-account"]}
|
||||||
:target {:type :mastodon-target
|
:target {:type :mastodon-target
|
||||||
:append-screen-name? true
|
:append-screen-name? true
|
||||||
:media-only? false
|
:media-only? false
|
||||||
|
@ -21,4 +21,6 @@
|
||||||
:visibility "unlisted"
|
:visibility "unlisted"
|
||||||
:sensitive? true
|
:sensitive? true
|
||||||
:signature "my-bot"}
|
:signature "my-bot"}
|
||||||
:resolve-urls? true}])))
|
:resolve-urls? true
|
||||||
|
:content-filters [".*bannedsite.*"]
|
||||||
|
:keyword-filters [".*"]}])))
|
||||||
|
|
Reference in a new issue