mv resolve-urls? to general transformation tool

This commit is contained in:
jem 2020-05-29 23:31:11 +02:00
parent 0bea240e49
commit 8719e7fd15
4 changed files with 39 additions and 35 deletions

View file

@ -84,7 +84,10 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w
;; optional limit for the post length
:max-post-length 300
;; optional signature for posts
:signature "#newsbot"}}]
:signature "#newsbot"}
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true}]
}
```

View file

@ -5,7 +5,6 @@
[orchestra.core :refer-macros [defn-spec]]
[clojure.string :as string]
[mastodon-bot.infra :as infra]
["deasync" :as deasync]
["request" :as request]
["mastodon-api" :as mastodon]))
@ -18,7 +17,6 @@
(s/def ::signature string?)
(s/def ::sensitive? boolean?)
(s/def ::media-only? boolean?)
(s/def ::resolve-urls? boolean?)
(s/def ::visibility #{"direct" "private" "unlisted" "public"})
(s/def ::replacements string?)
(s/def ::max-post-length (fn [n] (and
@ -37,7 +35,6 @@
::append-screen-name?
::sensitive?
::media-only?
;::resolve-urls?
;::replacements
]))
(def mastodon-config? (s/merge mastodon-auth? mastodon-target?))
@ -80,31 +77,6 @@
status-id string?]
(.delete (mastodon-client mastodon-config) (str "statuses/" status-id) #js {}))
;; TODO: move to transform
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
;; TODO: move to transform
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
; TODO: move to transform
(defn-spec resolve-urls string?
[mastodon-config mastodon-config?
text string?]
(cond-> text
(:resolve-urls? mastodon-config)
(string/replace shortened-url-pattern resolve-url)))
(defn post-status
([mastodon-auth target status-text]
(post-status mastodon-auth target status-text nil print))
@ -114,7 +86,6 @@
(let [{:keys [visibility sensitive?]} target]
(-> (.post (mastodon-client mastodon-auth) "statuses"
(clj->js (merge {:status (->> status-text
(resolve-urls mastodon-auth)
(perform-replacements mastodon-auth))}
(when media-ids {:media_ids media-ids})
(when sensitive? {:sensitive sensitive?})

View file

@ -7,7 +7,9 @@
[mastodon-bot.infra :as infra]
[mastodon-bot.mastodon-api :as masto]
[mastodon-bot.twitter-api :as twitter]
[mastodon-bot.tumblr-api :as tumblr]))
[mastodon-bot.tumblr-api :as tumblr]
["deasync" :as deasync]
["request" :as request]))
(s/def ::created-at any?)
(s/def ::text string?)
@ -19,6 +21,7 @@
(def mastodon-output? (s/keys :req-un [::created-at ::text]
:opt-un [::media-links]))
(s/def ::type keyword?)
(s/def ::resolve-urls? boolean?)
(defmulti source-type :type)
(defmethod source-type :twitter-source [_]
(s/merge (s/keys :req-un[::type]) twitter/twitter-source?))
@ -27,7 +30,9 @@
(defmethod target-type :mastodon-target [_]
(s/merge (s/keys :req-un [::type]) masto/mastodon-target?))
(s/def ::target (s/multi-spec target-type ::type))
(s/def ::transformation (s/keys :req-un [::source ::target]))
(s/def ::transformation (s/keys :req-un [::source ::target]
:opt-un [::resolve-urls?]))
(def transformations? (s/* ::transformation))
(defn trim-text [text max-post-length]
@ -47,6 +52,27 @@
:else text))
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
(defn-spec intermediate-resolve-urls string?
[resolve-urls? ::resolve-urls?
input input?]
(when resolve-urls?
(update input :text #(string/replace % shortened-url-pattern resolve-url))))
; TODO: move this to mastodon-api - seems to belong strongly to mastodon
(defn-spec intermediate-to-mastodon mastodon-output?
[mastodon-auth masto/mastodon-auth?
@ -74,6 +100,7 @@
(defn-spec post-tweets-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
resolve-urls? ::resolve-urls?
source twitter/twitter-source?
target masto/mastodon-target?
last-post-time any?]
@ -82,7 +109,8 @@
(infra/exit-with-error error)
(->> (infra/js->edn tweets)
(map twitter/parse-tweet)
(map twitter/nitter-url source)
(map #(intermediate-resolve-urls resolve-urls? %))
(map #(twitter/nitter-url source %))
(map #(intermediate-to-mastodon mastodon-auth target %))
(masto/post-items mastodon-auth target last-post-time)))))
@ -91,7 +119,7 @@
twitter-auth twitter/twitter-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target]} transformation]
(let [{:keys [source target resolve-urls?]} transformation]
(doseq [account (:accounts source)]
(twitter/user-timeline
twitter-auth
@ -99,6 +127,7 @@
account
(post-tweets-to-mastodon
mastodon-auth
resolve-urls?
source
target
last-post-time)))))

View file

@ -20,4 +20,5 @@
:max-post-length 500
:visibility "unlisted"
:sensitive? true
:signature "my-bot"}}])))
:signature "my-bot"}
:resolve-urls? true}])))