Merge pull request #43 from DomainDrivenArchitecture/refactoring

Refactoring: Introduce spec & more modulare ns
This commit is contained in:
Dmitri Sotnikov 2020-05-15 10:52:27 -04:00 committed by GitHub
commit 3f1f80e058
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 360 additions and 248 deletions

2
.gitignore vendored
View file

@ -1,3 +1,5 @@
config.edn config.edn
package-lock.json package-lock.json
/.shadow-cljs
/node_modules /node_modules
/target

View file

@ -1,243 +0,0 @@
#!/usr/bin/env lumo
(ns mastodon-bot.core
(:require
[cljs.core :refer [*command-line-args*]]
[cljs.reader :as edn]
[clojure.set :refer [rename-keys]]
[clojure.string :as string]
["deasync" :as deasync]
["request" :as request]
["fs" :as fs]
["mastodon-api" :as mastodon]
["rss-parser" :as rss]
["tumblr" :as tumblr]
["twitter" :as twitter]))
(defn exit-with-error [error]
(js/console.error error)
(js/process.exit 1))
(defn find-config []
(let [config (or (first *command-line-args*)
(-> js/process .-env .-MASTODON_BOT_CONFIG)
"config.edn")]
(if (fs/existsSync config)
config
(exit-with-error (str "failed to read config: " config)))))
(def config (-> (find-config) (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
(def mastodon-config (:mastodon config))
(def mastodon-client (or (some-> mastodon-config clj->js mastodon.)
(exit-with-error "missing Mastodon client configuration!")))
(def content-filter-regexes (mapv re-pattern (:content-filters mastodon-config)))
(def keyword-filter-regexes (mapv re-pattern (:keyword-filters mastodon-config)))
(def append-screen-name? (boolean (:append-screen-name? mastodon-config)))
(def max-post-length (:max-post-length mastodon-config))
(defn blocked-content? [text]
(boolean
(or (some #(re-find % text) content-filter-regexes)
(when (not-empty keyword-filter-regexes)
(empty? (some #(re-find % text) keyword-filter-regexes))))))
(defn js->edn [data]
(js->clj data :keywordize-keys true))
(defn trim-text [text]
(cond
(nil? max-post-length)
text
(> (count text) max-post-length)
(reduce
(fn [text word]
(if (> (+ (count text) (count word)) (- max-post-length 3))
(reduced (str text "..."))
(str text " " word)))
""
(string/split text #" "))
:else text))
(defn delete-status [status]
(.delete mastodon-client (str "statuses/" status) #js {}))
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
(defn resolve-urls [text]
(cond-> text
(:resolve-urls? mastodon-config)
(string/replace shortened-url-pattern resolve-url)
(:nitter-urls? mastodon-config)
(string/replace #"https://twitter.com" "https://nitter.net")))
(defn perform-replacements [text]
(reduce-kv string/replace text (:replacements mastodon-config)))
(defn set-signature [text]
(if-let [signature (:signature mastodon-config )]
(str text "\n" signature)
text))
(defn post-status
([status-text]
(post-status status-text nil))
([status-text media-ids]
(let [{:keys [sensitive? signature visibility]} mastodon-config]
(.post mastodon-client "statuses"
(clj->js (merge {:status (-> status-text resolve-urls perform-replacements set-signature)}
(when media-ids {:media_ids media-ids})
(when sensitive? {:sensitive sensitive?})
(when visibility {:visibility visibility})))))))
(defn post-image [image-stream description callback]
(-> (.post mastodon-client "media" #js {:file image-stream :description description})
(.then #(-> % .-data .-id callback))))
(defn post-status-with-images
([status-text urls]
(post-status-with-images status-text urls []))
([status-text [url & urls] ids]
(if url
(-> request
(.get url)
(.on "response"
(fn [image-stream]
(post-image image-stream status-text #(post-status-with-images status-text urls (conj ids %))))))
(post-status status-text (not-empty ids)))))
(defn get-mastodon-timeline [callback]
(.then (.get mastodon-client (str "accounts/" (:account-id mastodon-config)"/statuses") #js {})
#(let [response (-> % .-data js->edn)]
(if-let [error (:error response)]
(exit-with-error error)
(callback response)))))
(defn post-items [last-post-time items]
(doseq [{:keys [text media-links]} (->> items
(remove #(blocked-content? (:text %)))
(filter #(> (:created-at %) last-post-time)))]
(if media-links
(post-status-with-images text media-links)
(when-not (:media-only? mastodon-config)
(post-status text)))))
(defn in [needle haystack]
(some (partial = needle) haystack))
; If the text ends in a link to the media (which is uploaded anyway),
; chop it off instead of including the link in the toot
(defn chop-tail-media-url [text media]
(string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0))))
(defn parse-tweet [{created-at :created_at
text :full_text
{:keys [media]} :extended_entities
{:keys [screen_name]} :user :as tweet}]
{:created-at (js/Date. created-at)
:text (trim-text (str (chop-tail-media-url text media) (if append-screen-name? (str "\n - " screen_name) "")))
:media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)})
(defmulti parse-tumblr-post :type)
(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}]
{:created-at (js/Date. date)
:text (str (trim-text body) "\n\n" short_url)})
(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}]
{:created-at (js/Date. date)
:text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url])
:media-links (mapv #(-> % :original_size :url) photos)})
(defmethod parse-tumblr-post :default [post])
(defn post-tumblrs [last-post-time]
(fn [err response]
(->> response
js->edn
:posts
(mapv parse-tumblr-post)
(post-items last-post-time))))
(defn post-tweets [last-post-time]
(fn [error tweets response]
(if error
(exit-with-error error)
(->> (js->edn tweets)
(map parse-tweet)
(post-items last-post-time)))))
(defn strip-utm [news-link]
(first (string/split news-link #"\?utm")))
(defn parse-feed [last-post-time parser [title url]]
(-> (.parseURL parser url)
(.then #(post-items
last-post-time
(for [{:keys [title isoDate pubDate content link]} (-> % js->edn :items)]
{:created-at (js/Date. (or isoDate pubDate))
:text (str (trim-text title) "\n\n" (strip-utm link))})))))
(defn twitter-client [access-keys]
(try
(twitter. (clj->js access-keys))
(catch js/Error e
(exit-with-error
(str "failed to connect to Twitter: " (.-message e))))))
(defn tumblr-client [access-keys account]
(try
(tumblr/Blog. account (clj->js access-keys))
(catch js/Error e
(exit-with-error
(str "failed to connect to Tumblr account " account ": " (.-message e))))))
(defn -main []
(get-mastodon-timeline
(fn [timeline]
(let [last-post-time (-> timeline first :created_at (js/Date.))]
;;post from Twitter
(when-let [twitter-config (:twitter config)]
(let [{:keys [access-keys accounts include-replies? include-rts?]} twitter-config
client (twitter-client access-keys)]
(doseq [account accounts]
(.get client
"statuses/user_timeline"
#js {:screen_name account
:tweet_mode "extended"
:include_rts (boolean include-rts?)
:exclude_replies (not (boolean include-replies?))}
(post-tweets last-post-time)))))
;;post from Tumblr
(when-let [{:keys [access-keys accounts limit]} (:tumblr config)]
(doseq [account accounts]
(let [client (tumblr-client access-keys account)]
(.posts client #js {:limit (or limit 5)} (post-tumblrs last-post-time)))))
;;post from RSS
(when-let [feeds (some-> config :rss)]
(let [parser (rss.)]
(doseq [feed feeds]
(parse-feed last-post-time parser feed))))))))
(set! *main-cli-fn* -main)

View file

@ -1,6 +1,12 @@
{:source-paths ["mastodon_bot"] {:source-paths ["src/main"
:dependencies [] "src/test"]
:builds {:app {:target :node-script :dependencies [[orchestra "2018.12.06-2"]]
:builds {:dev {:target :node-library
:output-to "target/lib-mastodon-bot.js"
:exports {:infra mastodon-bot.infra/js->edn}
:repl-pprint true
}
:app {:target :node-script
:output-to "target/mastodon-bot.js" :output-to "target/mastodon-bot.js"
:main mastodon-bot.core/main :main mastodon-bot.core/main
:compiler-options {:optimizations :simple}}}} :compiler-options {:optimizations :simple}}}}

154
src/main/mastodon_bot/core.cljs Executable file
View file

@ -0,0 +1,154 @@
#!/usr/bin/env lumo
(ns mastodon-bot.core
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[orchestra.core :refer-macros [defn-spec]]
[cljs.core :refer [*command-line-args*]]
[clojure.string :as string]
["rss-parser" :as rss]
["tumblr" :as tumblr]
["twitter" :as twitter]
[mastodon-bot.infra :as infra]
[mastodon-bot.mastodon-api :as masto]))
(s/def ::mastodon-config masto/mastodon-config?)
(s/def ::twitter map?)
(s/def ::tumblr map?)
(s/def ::rss map?)
(def config? (s/keys :req-un [::mastodon-config]
:opt-un [::twitter ::tumblr ::rss]))
(defn-spec mastodon-config ::mastodon-config
[config config?]
(:mastodon-config config))
(def config (infra/load-config))
(defn trim-text [text]
(let [max-post-length (masto/max-post-length (mastodon-config config))]
(cond
(nil? max-post-length)
text
(> (count text) max-post-length)
(reduce
(fn [text word]
(if (> (+ (count text) (count word)) (- max-post-length 3))
(reduced (str text "..."))
(str text " " word)))
""
(string/split text #" "))
:else text)))
(defn in [needle haystack]
(some (partial = needle) haystack))
; If the text ends in a link to the media (which is uploaded anyway),
; chop it off instead of including the link in the toot
(defn chop-tail-media-url [text media]
(string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0))))
(defn parse-tweet [{created-at :created_at
text :full_text
{:keys [media]} :extended_entities
{:keys [screen_name]} :user :as tweet}]
{:created-at (js/Date. created-at)
:text (trim-text (str (chop-tail-media-url text media)
(if (masto/append-screen-name? (mastodon-config config))
(str "\n - " screen_name) "")))
:media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)})
(defmulti parse-tumblr-post :type)
(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}]
{:created-at (js/Date. date)
:text (str (trim-text body) "\n\n" short_url)})
(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}]
{:created-at (js/Date. date)
:text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url])
:media-links (mapv #(-> % :original_size :url) photos)})
(defmethod parse-tumblr-post :default [post])
(defn post-tumblrs [last-post-time]
(fn [err response]
(->> response
infra/js->edn
:posts
(mapv parse-tumblr-post)
(masto/post-items
(mastodon-config config)
last-post-time))))
(defn post-tweets [last-post-time]
(fn [error tweets response]
(if error
(infra/exit-with-error error)
(->> (infra/js->edn tweets)
(map parse-tweet)
(masto/post-items
(mastodon-config config)
last-post-time)))))
(defn strip-utm [news-link]
(first (string/split news-link #"\?utm")))
(defn parse-feed [last-post-time parser [title url]]
(-> (.parseURL parser url)
(.then #(masto/post-items
(mastodon-config config)
last-post-time
(for [{:keys [title isoDate pubDate content link]} (-> % infra/js->edn :items)]
{:created-at (js/Date. (or isoDate pubDate))
:text (str (trim-text title) "\n\n" (strip-utm link))})))))
(defn twitter-client [access-keys]
(try
(twitter. (clj->js access-keys))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Twitter: " (.-message e))))))
(defn tumblr-client [access-keys account]
(try
(tumblr/Blog. account (clj->js access-keys))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Tumblr account " account ": " (.-message e))))))
(defn -main []
(masto/get-mastodon-timeline
(mastodon-config config)
(fn [timeline]
(let [last-post-time (-> timeline first :created_at (js/Date.))]
;;post from Twitter
(when-let [twitter-config (:twitter config)]
(let [{:keys [access-keys accounts include-replies? include-rts?]} twitter-config
client (twitter-client access-keys)]
(doseq [account accounts]
(.get client
"statuses/user_timeline"
#js {:screen_name account
:tweet_mode "extended"
:include_rts (boolean include-rts?)
:exclude_replies (not (boolean include-replies?))}
(post-tweets last-post-time)))))
;;post from Tumblr
(when-let [{:keys [access-keys accounts limit]} (:tumblr config)]
(doseq [account accounts]
(let [client (tumblr-client access-keys account)]
(.posts client #js {:limit (or limit 5)} (post-tumblrs last-post-time)))))
;;post from RSS
(when-let [feeds (some-> config :rss)]
(let [parser (rss.)]
(doseq [feed feeds]
(parse-feed last-post-time parser feed))))))))
(set! *main-cli-fn* -main)
(st/instrument 'mastodon-config)

View file

@ -0,0 +1,22 @@
(ns mastodon-bot.infra
(:require
[cljs.reader :as edn]
["fs" :as fs]))
(defn js->edn [data]
(js->clj data :keywordize-keys true))
(defn exit-with-error [error]
(js/console.error error)
(js/process.exit 1))
(defn find-config []
(let [config (or (first *command-line-args*)
(-> js/process .-env .-MASTODON_BOT_CONFIG)
"config.edn")]
(if (fs/existsSync config)
config
(exit-with-error (str "failed to read config: " config)))))
(defn load-config []
(-> (find-config) (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))

View file

@ -0,0 +1,171 @@
(ns mastodon-bot.mastodon-api
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[orchestra.core :refer-macros [defn-spec]]
[clojure.string :as string]
[mastodon-bot.infra :as infra]
["deasync" :as deasync]
["request" :as request]
["mastodon-api" :as mastodon]))
; Todo: think about how namespaced keywords & clj->js can play nicely together
(s/def ::access_token string?)
(s/def ::api_url string?)
(s/def ::account-id string?)
(s/def ::content-filter string?)
(s/def ::keyword-filter string?)
(s/def ::append-screen-name? boolean?)
(s/def ::signature string?)
(s/def ::sensitive? boolean?)
(s/def ::resolve-urls? boolean?)
(s/def ::nitter-urls? boolean?)
(s/def ::visibility string?)
(s/def ::replacements string?)
(s/def ::max-post-length (fn [n] (and
(int? n)
(<= n 500)
(> n 0))))
(s/def ::content-filters (s/* ::content-filter))
(s/def ::keyword-filters (s/* ::keyword-filter))
(s/def ::mastodon-js-config (s/keys :req-un [::access_token ::api_url]))
(s/def ::mastodon-clj-config (s/keys :req-un [::account-id ::content-filters ::keyword-filters
::max-post-length ::signature ::visibility
::append-screen-name? ::sensitive? ::resolve-urls?
::nitter-urls? ::replacements]))
(def mastodon-config? (s/merge ::mastodon-js-config ::mastodon-clj-config))
(defn-spec content-filter-regexes ::content-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:content-filters mastodon-config)))
(defn-spec keyword-filter-regexes ::keyword-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:keyword-filters mastodon-config)))
(defn-spec append-screen-name? ::append-screen-name?
[mastodon-config mastodon-config?]
(boolean (:append-screen-name? mastodon-config)))
(defn-spec max-post-length ::max-post-length
[mastodon-config mastodon-config?]
(:max-post-length mastodon-config))
(defn-spec perform-replacements string?
[mastodon-config mastodon-config?
text string?]
(reduce-kv string/replace text (:replacements mastodon-config)))
(defn-spec mastodon-client any?
[mastodon-config mastodon-config?]
(or (some-> mastodon-config clj->js mastodon.)
(infra/exit-with-error "missing Mastodon client configuration!")))
(defn-spec blocked-content? boolean?
[mastodon-config mastodon-config?
text string?]
(boolean
(or (some #(re-find % text) (content-filter-regexes mastodon-config))
(when (not-empty (keyword-filter-regexes mastodon-config))
(empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config)))))))
(defn-spec delete-status any?
[mastodon-config mastodon-config?
status-id string?]
(.delete (mastodon-client mastodon-config) (str "statuses/" status-id) #js {}))
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
(defn-spec resolve-urls string?
[mastodon-config mastodon-config?
text string?]
(cond-> text
(:resolve-urls? mastodon-config)
(string/replace shortened-url-pattern resolve-url)
(:nitter-urls? mastodon-config)
(string/replace #"https://twitter.com" "https://nitter.net")))
(defn-spec set-signature string?
[mastodon-config mastodon-config?
text string?]
(if-let [signature (:signature mastodon-config )]
(str text "\n" signature)
text))
(defn post-status
([mastodon-config status-text]
(post-status mastodon-config status-text nil print))
([mastodon-config status-text media-ids]
(post-status mastodon-config status-text media-ids print))
([mastodon-config status-text media-ids callback]
(let [{:keys [sensitive? signature visibility]} mastodon-config]
(-> (.post (mastodon-client mastodon-config) "statuses"
(clj->js (merge {:status (->> status-text
(resolve-urls mastodon-config)
(perform-replacements mastodon-config)
(set-signature mastodon-config))}
(when media-ids {:media_ids media-ids})
(when sensitive? {:sensitive sensitive?})
(when visibility {:visibility visibility}))))
(.then #(-> % callback))))))
(defn-spec post-image any?
[mastodon-config mastodon-config?
image-stream any?
description string?
callback fn?]
(-> (.post (mastodon-client mastodon-config) "media"
#js {:file image-stream :description description})
(.then #(-> % .-data .-id callback))))
(defn post-status-with-images
([mastodon-config status-text urls]
(post-status-with-images mastodon-config status-text urls [] print))
([mastodon-config status-text urls ids]
(post-status-with-images mastodon-config status-text urls ids print))
([mastodon-config status-text [url & urls] ids callback]
(if url
(-> request
(.get url)
(.on "response"
(fn [image-stream]
(post-image mastodon-config image-stream status-text
#(post-status-with-images mastodon-config status-text urls (conj ids %) callback)))))
(post-status mastodon-config status-text (not-empty ids) callback))))
(defn-spec get-mastodon-timeline any?
[mastodon-config mastodon-config?
callback fn?]
(.then (.get (mastodon-client mastodon-config)
(str "accounts/" (:account-id mastodon-config)"/statuses") #js {})
#(let [response (-> % .-data infra/js->edn)]
(if-let [error (::error response)]
(infra/exit-with-error error)
(callback response)))))
(defn-spec post-items any?
[mastodon-config mastodon-config?
last-post-time any?
items any?]
(doseq [{:keys [text media-links]}
(->> items
(remove #((blocked-content? mastodon-config (:text %))))
(filter #(> (:created-at %) last-post-time)))]
(if media-links
(post-status-with-images mastodon-config text media-links)
(when-not (::media-only? mastodon-config)
(post-status mastodon-config text)))))