diff --git a/.gitignore b/.gitignore index 55ab5df..a87dbc4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ config.edn package-lock.json +/.shadow-cljs /node_modules +/target diff --git a/README.md b/README.md index c5a1d9f..10dea30 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,8 @@ with later timestamps to avoid duplicate posts. On the first run the timestamp w ;; optional keyword filter regexes ;; any posts not matching the regexes will be filtered out :keyword-filters [".*clojure.*"] - ;; Replace Twitter links by Nitter - :nitter-urls? false}} + ;; Replace Twitter links by Nitter + :nitter-urls? false}} ``` * the bot looks for `config.edn` at its relative path by default, an alternative location can be specified either using the `MASTODON_BOT_CONFIG` environment variable or passing the path to config as an argument diff --git a/mastodon_bot/core.cljs b/mastodon_bot/core.cljs deleted file mode 100755 index ba0d749..0000000 --- a/mastodon_bot/core.cljs +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env lumo - -(ns mastodon-bot.core - (:require - [cljs.core :refer [*command-line-args*]] - [cljs.reader :as edn] - [clojure.set :refer [rename-keys]] - [clojure.string :as string] - ["deasync" :as deasync] - ["request" :as request] - ["fs" :as fs] - ["mastodon-api" :as mastodon] - ["rss-parser" :as rss] - ["tumblr" :as tumblr] - ["twitter" :as twitter])) - -(defn exit-with-error [error] - (js/console.error error) - (js/process.exit 1)) - -(defn find-config [] - (let [config (or (first *command-line-args*) - (-> js/process .-env .-MASTODON_BOT_CONFIG) - "config.edn")] - (if (fs/existsSync config) - config - (exit-with-error (str "failed to read config: " config))))) - -(def config (-> (find-config) (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string)) - -(def mastodon-config (:mastodon config)) - -(def mastodon-client (or (some-> mastodon-config clj->js mastodon.) - (exit-with-error "missing Mastodon client configuration!"))) - -(def content-filter-regexes (mapv re-pattern (:content-filters mastodon-config))) - -(def keyword-filter-regexes (mapv re-pattern (:keyword-filters mastodon-config))) - -(def append-screen-name? (boolean (:append-screen-name? mastodon-config))) - -(def max-post-length (:max-post-length mastodon-config)) - -(defn blocked-content? [text] - (boolean - (or (some #(re-find % text) content-filter-regexes) - (when (not-empty keyword-filter-regexes) - (empty? (some #(re-find % text) keyword-filter-regexes)))))) - -(defn js->edn [data] - (js->clj data :keywordize-keys true)) - -(defn trim-text [text] - (cond - - (nil? max-post-length) - text - - (> (count text) max-post-length) - (reduce - (fn [text word] - (if (> (+ (count text) (count word)) (- max-post-length 3)) - (reduced (str text "...")) - (str text " " word))) - "" - (string/split text #" ")) - - :else text)) - -(defn delete-status [status] - (.delete mastodon-client (str "statuses/" status) #js {})) - -(defn resolve-url [[uri]] - (try - (or - (some-> ((deasync request) - #js {:method "GET" - :uri (if (string/starts-with? uri "https://") uri (str "https://" uri)) - :followRedirect false}) - (.-headers) - (.-location) - (string/replace "?mbid=social_twitter" "")) - uri) - (catch js/Error _ uri))) - -(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?") - -(defn resolve-urls [text] - (cond-> text - (:resolve-urls? mastodon-config) - (string/replace shortened-url-pattern resolve-url) - (:nitter-urls? mastodon-config) - (string/replace #"https://twitter.com" "https://nitter.net"))) - -(defn perform-replacements [text] - (reduce-kv string/replace text (:replacements mastodon-config))) - -(defn set-signature [text] - (if-let [signature (:signature mastodon-config )] - (str text "\n" signature) - text)) - -(defn post-status - ([status-text] - (post-status status-text nil)) - ([status-text media-ids] - (let [{:keys [sensitive? signature visibility]} mastodon-config] - (.post mastodon-client "statuses" - (clj->js (merge {:status (-> status-text resolve-urls perform-replacements set-signature)} - (when media-ids {:media_ids media-ids}) - (when sensitive? {:sensitive sensitive?}) - (when visibility {:visibility visibility}))))))) - -(defn post-image [image-stream description callback] - (-> (.post mastodon-client "media" #js {:file image-stream :description description}) - (.then #(-> % .-data .-id callback)))) - -(defn post-status-with-images - ([status-text urls] - (post-status-with-images status-text urls [])) - ([status-text [url & urls] ids] - (if url - (-> request - (.get url) - (.on "response" - (fn [image-stream] - (post-image image-stream status-text #(post-status-with-images status-text urls (conj ids %)))))) - (post-status status-text (not-empty ids))))) - -(defn get-mastodon-timeline [callback] - (.then (.get mastodon-client (str "accounts/" (:account-id mastodon-config)"/statuses") #js {}) - #(let [response (-> % .-data js->edn)] - (if-let [error (:error response)] - (exit-with-error error) - (callback response))))) - -(defn post-items [last-post-time items] - (doseq [{:keys [text media-links]} (->> items - (remove #(blocked-content? (:text %))) - (filter #(> (:created-at %) last-post-time)))] - (if media-links - (post-status-with-images text media-links) - (when-not (:media-only? mastodon-config) - (post-status text))))) - -(defn in [needle haystack] - (some (partial = needle) haystack)) - -; If the text ends in a link to the media (which is uploaded anyway), -; chop it off instead of including the link in the toot -(defn chop-tail-media-url [text media] - (string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0)))) - -(defn parse-tweet [{created-at :created_at - text :full_text - {:keys [media]} :extended_entities - {:keys [screen_name]} :user :as tweet}] - {:created-at (js/Date. created-at) - :text (trim-text (str (chop-tail-media-url text media) (if append-screen-name? (str "\n - " screen_name) ""))) - :media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)}) - -(defmulti parse-tumblr-post :type) - -(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}] - {:created-at (js/Date. date) - :text (str (trim-text body) "\n\n" short_url)}) - -(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}] - {:created-at (js/Date. date) - :text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url]) - :media-links (mapv #(-> % :original_size :url) photos)}) - -(defmethod parse-tumblr-post :default [post]) - -(defn post-tumblrs [last-post-time] - (fn [err response] - (->> response - js->edn - :posts - (mapv parse-tumblr-post) - (post-items last-post-time)))) - -(defn post-tweets [last-post-time] - (fn [error tweets response] - (if error - (exit-with-error error) - (->> (js->edn tweets) - (map parse-tweet) - (post-items last-post-time))))) - -(defn strip-utm [news-link] - (first (string/split news-link #"\?utm"))) - -(defn parse-feed [last-post-time parser [title url]] - (-> (.parseURL parser url) - (.then #(post-items - last-post-time - (for [{:keys [title isoDate pubDate content link]} (-> % js->edn :items)] - {:created-at (js/Date. (or isoDate pubDate)) - :text (str (trim-text title) "\n\n" (strip-utm link))}))))) - -(defn twitter-client [access-keys] - (try - (twitter. (clj->js access-keys)) - (catch js/Error e - (exit-with-error - (str "failed to connect to Twitter: " (.-message e)))))) - -(defn tumblr-client [access-keys account] - (try - (tumblr/Blog. account (clj->js access-keys)) - (catch js/Error e - (exit-with-error - (str "failed to connect to Tumblr account " account ": " (.-message e)))))) - -(defn -main [] - (get-mastodon-timeline - (fn [timeline] - (let [last-post-time (-> timeline first :created_at (js/Date.))] - ;;post from Twitter - (when-let [twitter-config (:twitter config)] - (let [{:keys [access-keys accounts include-replies? include-rts?]} twitter-config - client (twitter-client access-keys)] - (doseq [account accounts] - (.get client - "statuses/user_timeline" - #js {:screen_name account - :tweet_mode "extended" - :include_rts (boolean include-rts?) - :exclude_replies (not (boolean include-replies?))} - (post-tweets last-post-time))))) - ;;post from Tumblr - (when-let [{:keys [access-keys accounts limit]} (:tumblr config)] - (doseq [account accounts] - (let [client (tumblr-client access-keys account)] - (.posts client #js {:limit (or limit 5)} (post-tumblrs last-post-time))))) - ;;post from RSS - (when-let [feeds (some-> config :rss)] - (let [parser (rss.)] - (doseq [feed feeds] - (parse-feed last-post-time parser feed)))))))) - -(set! *main-cli-fn* -main) diff --git a/shadow-cljs.edn b/shadow-cljs.edn index d666e71..337ec72 100644 --- a/shadow-cljs.edn +++ b/shadow-cljs.edn @@ -1,6 +1,12 @@ -{:source-paths ["mastodon_bot"] - :dependencies [] - :builds {:app {:target :node-script +{:source-paths ["src/main" + "src/test"] + :dependencies [[orchestra "2018.12.06-2"]] + :builds {:dev {:target :node-library + :output-to "target/lib-mastodon-bot.js" + :exports {:infra mastodon-bot.infra/js->edn} + :repl-pprint true + } + :app {:target :node-script :output-to "target/mastodon-bot.js" :main mastodon-bot.core/main :compiler-options {:optimizations :simple}}}} diff --git a/src/main/mastodon_bot/core.cljs b/src/main/mastodon_bot/core.cljs new file mode 100755 index 0000000..3a78cbc --- /dev/null +++ b/src/main/mastodon_bot/core.cljs @@ -0,0 +1,154 @@ +#!/usr/bin/env lumo + +(ns mastodon-bot.core + (:require + [clojure.spec.alpha :as s] + [clojure.spec.test.alpha :as st] + [orchestra.core :refer-macros [defn-spec]] + [cljs.core :refer [*command-line-args*]] + [clojure.string :as string] + ["rss-parser" :as rss] + ["tumblr" :as tumblr] + ["twitter" :as twitter] + [mastodon-bot.infra :as infra] + [mastodon-bot.mastodon-api :as masto])) + +(s/def ::mastodon-config masto/mastodon-config?) +(s/def ::twitter map?) +(s/def ::tumblr map?) +(s/def ::rss map?) + +(def config? (s/keys :req-un [::mastodon-config] + :opt-un [::twitter ::tumblr ::rss])) + +(defn-spec mastodon-config ::mastodon-config + [config config?] + (:mastodon-config config)) + +(def config (infra/load-config)) + +(defn trim-text [text] + (let [max-post-length (masto/max-post-length (mastodon-config config))] + (cond + + (nil? max-post-length) + text + + (> (count text) max-post-length) + (reduce + (fn [text word] + (if (> (+ (count text) (count word)) (- max-post-length 3)) + (reduced (str text "...")) + (str text " " word))) + "" + (string/split text #" ")) + + :else text))) + +(defn in [needle haystack] + (some (partial = needle) haystack)) + +; If the text ends in a link to the media (which is uploaded anyway), +; chop it off instead of including the link in the toot +(defn chop-tail-media-url [text media] + (string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0)))) + +(defn parse-tweet [{created-at :created_at + text :full_text + {:keys [media]} :extended_entities + {:keys [screen_name]} :user :as tweet}] + {:created-at (js/Date. created-at) + :text (trim-text (str (chop-tail-media-url text media) + (if (masto/append-screen-name? (mastodon-config config)) + (str "\n - " screen_name) ""))) + :media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)}) + +(defmulti parse-tumblr-post :type) + +(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}] + {:created-at (js/Date. date) + :text (str (trim-text body) "\n\n" short_url)}) + +(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}] + {:created-at (js/Date. date) + :text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url]) + :media-links (mapv #(-> % :original_size :url) photos)}) + +(defmethod parse-tumblr-post :default [post]) + +(defn post-tumblrs [last-post-time] + (fn [err response] + (->> response + infra/js->edn + :posts + (mapv parse-tumblr-post) + (masto/post-items + (mastodon-config config) + last-post-time)))) + +(defn post-tweets [last-post-time] + (fn [error tweets response] + (if error + (infra/exit-with-error error) + (->> (infra/js->edn tweets) + (map parse-tweet) + (masto/post-items + (mastodon-config config) + last-post-time))))) + +(defn strip-utm [news-link] + (first (string/split news-link #"\?utm"))) + +(defn parse-feed [last-post-time parser [title url]] + (-> (.parseURL parser url) + (.then #(masto/post-items + (mastodon-config config) + last-post-time + (for [{:keys [title isoDate pubDate content link]} (-> % infra/js->edn :items)] + {:created-at (js/Date. (or isoDate pubDate)) + :text (str (trim-text title) "\n\n" (strip-utm link))}))))) + +(defn twitter-client [access-keys] + (try + (twitter. (clj->js access-keys)) + (catch js/Error e + (infra/exit-with-error + (str "failed to connect to Twitter: " (.-message e)))))) + +(defn tumblr-client [access-keys account] + (try + (tumblr/Blog. account (clj->js access-keys)) + (catch js/Error e + (infra/exit-with-error + (str "failed to connect to Tumblr account " account ": " (.-message e)))))) + +(defn -main [] + (masto/get-mastodon-timeline + (mastodon-config config) + (fn [timeline] + (let [last-post-time (-> timeline first :created_at (js/Date.))] + ;;post from Twitter + (when-let [twitter-config (:twitter config)] + (let [{:keys [access-keys accounts include-replies? include-rts?]} twitter-config + client (twitter-client access-keys)] + (doseq [account accounts] + (.get client + "statuses/user_timeline" + #js {:screen_name account + :tweet_mode "extended" + :include_rts (boolean include-rts?) + :exclude_replies (not (boolean include-replies?))} + (post-tweets last-post-time))))) + ;;post from Tumblr + (when-let [{:keys [access-keys accounts limit]} (:tumblr config)] + (doseq [account accounts] + (let [client (tumblr-client access-keys account)] + (.posts client #js {:limit (or limit 5)} (post-tumblrs last-post-time))))) + ;;post from RSS + (when-let [feeds (some-> config :rss)] + (let [parser (rss.)] + (doseq [feed feeds] + (parse-feed last-post-time parser feed)))))))) + +(set! *main-cli-fn* -main) +(st/instrument 'mastodon-config) diff --git a/src/main/mastodon_bot/infra.cljs b/src/main/mastodon_bot/infra.cljs new file mode 100755 index 0000000..a2f1bcf --- /dev/null +++ b/src/main/mastodon_bot/infra.cljs @@ -0,0 +1,22 @@ +(ns mastodon-bot.infra + (:require + [cljs.reader :as edn] + ["fs" :as fs])) + +(defn js->edn [data] + (js->clj data :keywordize-keys true)) + +(defn exit-with-error [error] + (js/console.error error) + (js/process.exit 1)) + +(defn find-config [] + (let [config (or (first *command-line-args*) + (-> js/process .-env .-MASTODON_BOT_CONFIG) + "config.edn")] + (if (fs/existsSync config) + config + (exit-with-error (str "failed to read config: " config))))) + +(defn load-config [] + (-> (find-config) (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string)) \ No newline at end of file diff --git a/src/main/mastodon_bot/mastodon_api.cljs b/src/main/mastodon_bot/mastodon_api.cljs new file mode 100755 index 0000000..0d05c65 --- /dev/null +++ b/src/main/mastodon_bot/mastodon_api.cljs @@ -0,0 +1,171 @@ +(ns mastodon-bot.mastodon-api + (:require + [clojure.spec.alpha :as s] + [clojure.spec.test.alpha :as st] + [orchestra.core :refer-macros [defn-spec]] + [clojure.string :as string] + [mastodon-bot.infra :as infra] + ["deasync" :as deasync] + ["request" :as request] + ["mastodon-api" :as mastodon])) + +; Todo: think about how namespaced keywords & clj->js can play nicely together +(s/def ::access_token string?) +(s/def ::api_url string?) +(s/def ::account-id string?) +(s/def ::content-filter string?) +(s/def ::keyword-filter string?) +(s/def ::append-screen-name? boolean?) +(s/def ::signature string?) +(s/def ::sensitive? boolean?) +(s/def ::resolve-urls? boolean?) +(s/def ::nitter-urls? boolean?) +(s/def ::visibility string?) +(s/def ::replacements string?) +(s/def ::max-post-length (fn [n] (and + (int? n) + (<= n 500) + (> n 0)))) + +(s/def ::content-filters (s/* ::content-filter)) +(s/def ::keyword-filters (s/* ::keyword-filter)) +(s/def ::mastodon-js-config (s/keys :req-un [::access_token ::api_url])) +(s/def ::mastodon-clj-config (s/keys :req-un [::account-id ::content-filters ::keyword-filters + ::max-post-length ::signature ::visibility + ::append-screen-name? ::sensitive? ::resolve-urls? + ::nitter-urls? ::replacements])) +(def mastodon-config? (s/merge ::mastodon-js-config ::mastodon-clj-config)) + +(defn-spec content-filter-regexes ::content-filters + [mastodon-config mastodon-config?] + (mapv re-pattern (:content-filters mastodon-config))) + +(defn-spec keyword-filter-regexes ::keyword-filters + [mastodon-config mastodon-config?] + (mapv re-pattern (:keyword-filters mastodon-config))) + +(defn-spec append-screen-name? ::append-screen-name? + [mastodon-config mastodon-config?] + (boolean (:append-screen-name? mastodon-config))) + +(defn-spec max-post-length ::max-post-length + [mastodon-config mastodon-config?] + (:max-post-length mastodon-config)) + +(defn-spec perform-replacements string? + [mastodon-config mastodon-config? + text string?] + (reduce-kv string/replace text (:replacements mastodon-config))) + +(defn-spec mastodon-client any? + [mastodon-config mastodon-config?] + (or (some-> mastodon-config clj->js mastodon.) + (infra/exit-with-error "missing Mastodon client configuration!"))) + +(defn-spec blocked-content? boolean? + [mastodon-config mastodon-config? + text string?] + (boolean + (or (some #(re-find % text) (content-filter-regexes mastodon-config)) + (when (not-empty (keyword-filter-regexes mastodon-config)) + (empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config))))))) + +(defn-spec delete-status any? + [mastodon-config mastodon-config? + status-id string?] + (.delete (mastodon-client mastodon-config) (str "statuses/" status-id) #js {})) + +(defn resolve-url [[uri]] + (try + (or + (some-> ((deasync request) + #js {:method "GET" + :uri (if (string/starts-with? uri "https://") uri (str "https://" uri)) + :followRedirect false}) + (.-headers) + (.-location) + (string/replace "?mbid=social_twitter" "")) + uri) + (catch js/Error _ uri))) + +(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?") + +(defn-spec resolve-urls string? + [mastodon-config mastodon-config? + text string?] + (cond-> text + (:resolve-urls? mastodon-config) + (string/replace shortened-url-pattern resolve-url) + (:nitter-urls? mastodon-config) + (string/replace #"https://twitter.com" "https://nitter.net"))) + +(defn-spec set-signature string? + [mastodon-config mastodon-config? + text string?] + (if-let [signature (:signature mastodon-config )] + (str text "\n" signature) + text)) + +(defn post-status + ([mastodon-config status-text] + (post-status mastodon-config status-text nil print)) + ([mastodon-config status-text media-ids] + (post-status mastodon-config status-text media-ids print)) + ([mastodon-config status-text media-ids callback] + (let [{:keys [sensitive? signature visibility]} mastodon-config] + (-> (.post (mastodon-client mastodon-config) "statuses" + (clj->js (merge {:status (->> status-text + (resolve-urls mastodon-config) + (perform-replacements mastodon-config) + (set-signature mastodon-config))} + (when media-ids {:media_ids media-ids}) + (when sensitive? {:sensitive sensitive?}) + (when visibility {:visibility visibility})))) + (.then #(-> % callback)))))) + +(defn-spec post-image any? + [mastodon-config mastodon-config? + image-stream any? + description string? + callback fn?] + (-> (.post (mastodon-client mastodon-config) "media" + #js {:file image-stream :description description}) + (.then #(-> % .-data .-id callback)))) + +(defn post-status-with-images + ([mastodon-config status-text urls] + (post-status-with-images mastodon-config status-text urls [] print)) + ([mastodon-config status-text urls ids] + (post-status-with-images mastodon-config status-text urls ids print)) + ([mastodon-config status-text [url & urls] ids callback] + (if url + (-> request + (.get url) + (.on "response" + (fn [image-stream] + (post-image mastodon-config image-stream status-text + #(post-status-with-images mastodon-config status-text urls (conj ids %) callback))))) + (post-status mastodon-config status-text (not-empty ids) callback)))) + +(defn-spec get-mastodon-timeline any? + [mastodon-config mastodon-config? + callback fn?] + (.then (.get (mastodon-client mastodon-config) + (str "accounts/" (:account-id mastodon-config)"/statuses") #js {}) + #(let [response (-> % .-data infra/js->edn)] + (if-let [error (::error response)] + (infra/exit-with-error error) + (callback response))))) + +(defn-spec post-items any? + [mastodon-config mastodon-config? + last-post-time any? + items any?] + (doseq [{:keys [text media-links]} + (->> items + (remove #((blocked-content? mastodon-config (:text %)))) + (filter #(> (:created-at %) last-post-time)))] + (if media-links + (post-status-with-images mastodon-config text media-links) + (when-not (::media-only? mastodon-config) + (post-status mastodon-config text))))) diff --git a/mastodon_bot/core_test.cljs b/src/test/mastodon_bot/core_test.cljs similarity index 100% rename from mastodon_bot/core_test.cljs rename to src/test/mastodon_bot/core_test.cljs