Merge pull request #48 from DomainDrivenArchitecture/transformation

More differentiated transformation #45
This commit is contained in:
Dmitri Sotnikov 2020-06-27 10:16:51 -04:00 committed by GitHub
commit f00b84c75a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 5275 additions and 1991 deletions

31
.github/workflows/dev-build.yml vendored Normal file
View file

@ -0,0 +1,31 @@
name: Node.js CI
on:
push:
branches: [ master, transformation ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [10.x, 12.x, 14.x]
steps:
- uses: actions/checkout@v2
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v1
with:
node-version: ${{ matrix.node-version }}
- run: npm install
- run: npm install -g --save-dev shadow-cljs
- run: shadow-cljs compile test
- run: shadow-cljs release app
- run: rm -rf target/npm-build
- run: mkdir target/npm-build
- run: cp mastodon-bot.js target/npm-build/
- run: cp package.json target/npm-build/
- run: cp README.md target/npm-build/
- run: tar -cz -C target/npm-build -f target/npm-build.tgz .

1
.gitignore vendored
View file

@ -1,4 +1,5 @@
config.edn
config*.edn
package-lock.json
/.shadow-cljs
/node_modules

141
README.md
View file

@ -7,15 +7,6 @@ the bot will post the timeline from the specified Twitter/Tumblr accounts and RS
1. install [Node.js](https://nodejs.org/en/)
2. run `npm install` to install Node modules
If you wish to run the script directly, you will need to have [Lumo](https://github.com/anmonteiro/lumo) available on the shell path. Lumo can be installed globally via NPM by running:
npm install -g lumo-cljs
If you get a [permission failure](https://github.com/anmonteiro/lumo/issues/206), try this:
npm install -g lumo-cljs --unsafe-perm
### usage
* create a Mastodon API key following the instructions [here](https://tinysubversions.com/notes/mastodon-bot/)
@ -23,69 +14,89 @@ If you get a [permission failure](https://github.com/anmonteiro/lumo/issues/206)
* create a Tumblr API key following the instructions [here](http://www.developerdrive.com/2014/05/how-to-get-started-with-the-tumblr-api-part-1/)
* create a file called `config.edn` with the following contents:
**NOTE**: the bot checks the specified Mastodon account to see the timestamp of the last post, and only posts content
**NOTE**: the bot checks the specified Mastodon account to see the timestamp of the last post, and only posts content
with later timestamps to avoid duplicate posts. On the first run the timestamp will default to current time.
```clojure
{;; add Twitter config to mirror Twitter accounts
:twitter {:access-keys
{:consumer_key "XXXX"
:consumer_secret "XXXX"
:access_token_key "XXXX"
:access_token_secret "XXXX"}
;; optional, defaults to false
:include-replies? false
;; optional, defaults to false
:include-rts? false
;; accounts you wish to mirror
:accounts ["arstechnica" "WIRED"]}
;; add Tumblr config to mirror Tumblr accounts
:tumblr {:access-keys
{:consumer_key "XXXX"
:consumer_secret "XXXX"
:token "XXXX"
:token_secret "XXXX"}
;; optional limit for number of posts to retrieve, default: 5
:limit 10
:accounts ["cyberpunky.tumblr.com" "scipunk.tumblr.com"]}
;; add RSS config to follow feeds
:rss {"Hacker News" "https://hnrss.org/newest"
"r/Clojure" "https://www.reddit.com/r/clojure/.rss"}
:mastodon {:access_token "XXXX"
;; account number you see when you log in and go to your profile
;; e.g: https://mastodon.social/web/accounts/294795
:account-id "XXXX"
:api_url "https://botsin.space/api/v1/"
;; optional boolean to mark content as sensitive
:sensitive? true
;; optional boolean defaults to false
;; only sources containing media will be posted when set to true
:media-only? true
;; optional visibility flag: direct, private, unlisted, public
;; defaults to public
:visibility "unlisted"
;; optional limit for the post length
:max-post-length 300
;; optional flag specifying wether the name of the account
;; will be appended in the post, defaults to false
:append-screen-name? false
;; optional signature for posts
:signature "#newsbot"
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]
;; Replace Twitter links by Nitter
:nitter-urls? false}}
{:auth {;; add Twitter config to mirror Twitter accounts
:twitter {:consumer_key "XXXX"
:consumer_secret "XXXX"
:access_token_key "XXXX"
:access_token_secret "XXXX"}
:mastodon {:access_token "XXXX"
;; account number you see when you log in and go to your profile
;; e.g: https://mastodon.social/web/accounts/294795
:account-id "XXXX"
:api_url "https://botsin.space/api/v1/"}
:tumblr {:consumer_key "XXXX"
:consumer_secret "XXXX"
:token "XXXX"
:token_secret "XXXX"}}
:transform [{:source {:source-type :twitter
;; optional, defaults to false
:include-replies? false
;; optional, defaults to false
:include-rts? false
;; Replace Twitter links by Nitter
:nitter-urls? false
;; accounts you wish to mirror
:accounts ["arstechnica" "WIRED"]}
:target {:target-type :mastodon
;; optional flag specifying wether the name of the account
;; will be appended in the post, defaults to false
:append-screen-name? false
;; optional visibility flag: direct, private, unlisted, public
;; defaults to public
:visibility "unlisted"
;; optional boolean to mark content as sensitive. Defaults to true.
:sensitive? true
;; optional boolean defaults to false
;; only sources containing media will be posted when set to true
:media-only? true
;; optional limit for the post length. Defaults to 300.
:max-post-length 300
;; optional signature for posts. Defaults to "not present".
:signature "#newsbot"}
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]
;; optional replacements
;; When the strings on the left side of this map are encountered in the source,
;; they are replaced with the string on the right side of the map:
:replacements {
"@openSUSE" "@opensuse@fosstodon.org",
"@conservancy" "@conservancy@mastodon.technology"}}
{:source {:source-type :rss
;; add RSS config to follow feeds
:feeds [["Hacker News" "https://hnrss.org/newest"]
["r/Clojure" "https://www.reddit.com/r/clojure/.rss"]]}
:target {:target-type :mastodon
...}
:resolve-urls? ...}
{:source {:source-type :tumblr
;; optional limit for number of posts to retrieve, default: 5
:limit 10
:accounts ["cyberpunky.tumblr.com" "scipunk.tumblr.com"]
:target {:target-type :mastodon
...}
:resolve-urls? ...}}
]
}
```
* the bot looks for `config.edn` at its relative path by default, an alternative location can be specified either using the `MASTODON_BOT_CONFIG` environment variable or passing the path to config as an argument
* transformations have source `(s/def ::source-type #{:twitter :rss :tumblr})` und target `(s/def ::target-type #{:mastodon})` you can combine freely. Multiple transformations for same source-target combination are possible. Source and targets refer to the auth section for their credentials.
* run the bot: `npm start`
* to poll at intervals setup a cron job such as:

45
RELEASING.md Normal file
View file

@ -0,0 +1,45 @@
# Releasing
## dev release
```
shadow-cljs compile test
shadow-cljs release app
rm -rf target/npm-build
mkdir target/npm-build
cp mastodon-bot.js target/npm-build/
cp package.json target/npm-build/
cp README.md target/npm-build/
tar -cz -C target/npm-build -f target/npm-build.tgz .
npm publish ./target/npm-build.tgz --access public --tag dev0
```
## prod release (should be done from master)
```
shadow-cljs compile test
#adjust version
vi shadow-cljs.edn
git commit -am "releasing"
git tag [version]
git push && git push --tag
shadow-cljs release app
shadow-cljs release app
rm -rf target/npm-build
mkdir target/npm-build
cp mastodon-bot.js target/npm-build/
cp package.json target/npm-build/
cp README.md target/npm-build/
tar -cz -C target/npm-build -f target/npm-build.tgz .
npm publish ./target/npm-build.tgz --access public --tag [version]
# Bump version
vi shadow-cljs.edn
git commit -am "version bump" && git push
```

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,12 @@
{
"name": "mastodon-bot",
"version": "0.0.2",
"description": "Bot to publish twitter, tumblr or rss posts to an mastodon account.",
"author": "Dmitri Sotnikov",
"version": "0.1.0-dev-0",
"homepage": "https://github.com/yogthos/mastodon-bot",
"repository": "https://github.com/yogthos/mastodon-bot",
"license": "MIT",
"dependencies": {
"lumo-cljs": "^1.10.1",
"mastodon-api": "1.3.0",
"rss-parser": "3.7.1",
"tumblr": "0.4.1",
@ -15,17 +17,17 @@
"devDependencies": {
"shadow-cljs": "^2.8.37"
},
"directories": {
"lib": "."
},
"files": [
"mastodon_bot/*"
],
"main": "mastodon-bot.js",
"scripts": {
"start": "./mastodon_bot/core.cljs",
"test": "MASTODON_BOT_CONFIG=test.edn ./mastodon_bot/core_test.cljs"
"start": "node mastodon-bot.js"
},
"bin": {
"mastodon-bot": "mastodon_bot/core.cljs"
}
"keywords": [
"cljs",
"mastodon",
"twitter",
"rss",
"tumblr",
"bridge",
"bot"
]
}

View file

@ -0,0 +1,9 @@
{:items [
{:title [Blogpost] Idiomatic Clojure: Mixing Parallel Side Effects and Iteration,
:link https://www.reddit.com/r/Clojure/comments/gtz1ox/blogpost_idiomatic_clojure_mixing_parallel_side/,
:pubDate 2020-05-31T13:28:48.000Z,
:author /u/bsless,
:content <!-- SC_OFF --><div class="md"><p><a href="https://bsless.github.io/mapping-parallel-side-effects/">https://bsless.github.io/mapping-parallel-side-effects/</a></p> <p>Followup from <a href="https://bsless.github.io/side-effects/">the previous post</a> where I went over the ways to idiomatically map side effects, this post tackles the same issue in a concurrent context. This one has more code than hand-waving, and you can follow along with all the examples.</p> <p>Would appreciate your feedback. It&#39;s a rather technical post and there may be things I missed or didn&#39;t consider.</p> <p>Cheers</p> </div><!-- SC_ON --> &#32; submitted by &#32; <a href="https://www.reddit.com/user/bsless"> /u/bsless </a> <br/> <span><a href="https://www.reddit.com/r/Cloj
ure/comments/gtz1ox/blogpost_idiomatic_clojure_mixing_parallel_side/">[link]</a></span> &#32; <span><a href="https://www.reddit.com/r/Clojure/comments/gtz1ox/blogpost_idiomatic_clojure_mixing_parallel_side/">[comments]</a></span>, :contentSnippet https://bsless.github.io/mapping-parallel-side-effects/ Followup from the previous post where I went over the ways to idiomatically map side effects, this post tackles the same issue in a concurrent context. This one has more code than hand-waving, and you can follow along with all the examples. Would appreciate your feedback. It's a rather technical post and there may be things I missed or didn't consider. Cheers submitted by /u/bsless [link] [comments],
:id t3_gtz1ox,
:isoDate 2020-05-31T13:28:48.000Z} ]

View file

@ -1,12 +1,12 @@
{:source-paths ["src/main"
"src/test"]
:dependencies [[orchestra "2018.12.06-2"]]
:builds {:dev {:target :node-library
:output-to "target/lib-mastodon-bot.js"
:exports {:infra mastodon-bot.core/-main}
:repl-pprint true
}
:dependencies [[orchestra "2019.02.06-1"]
[expound "0.8.4"]]
:builds {:test {:target :node-test
:output-to "target/node-tests.js"
:autorun true
:repl-pprint true}
:app {:target :node-script
:output-to "target/mastodon-bot.js"
:output-to "mastodon-bot.js"
:main mastodon-bot.core/main
:compiler-options {:optimizations :simple}}}}

View file

@ -1,144 +1,114 @@
#!/usr/bin/env lumo
(ns mastodon-bot.core
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[clojure.string :as cs]
[orchestra.core :refer-macros [defn-spec]]
[cljs.core :refer [*command-line-args*]]
[clojure.string :as string]
["rss-parser" :as rss]
["tumblr" :as tumblr]
[expound.alpha :as expound]
[mastodon-bot.infra :as infra]
[mastodon-bot.transform :as transform]
[mastodon-bot.mastodon-api :as masto]
[mastodon-bot.twitter-api :as twitter]))
[mastodon-bot.twitter-api :as twitter]
[mastodon-bot.tumblr-api :as tumblr]))
(s/def ::mastodon-config masto/mastodon-config?)
(s/def ::twitter twitter/twitter-config?)
(s/def ::tumblr map?)
(s/def ::rss map?)
(set! s/*explain-out* expound/printer)
(def config? (s/keys :req-un [::mastodon-config]
:opt-un [::twitter ::tumblr ::rss]))
(s/def ::mastodon masto/mastodon-auth?)
(s/def ::twitter twitter/twitter-auth?)
(s/def ::tumblr tumblr/tumblr-auth?)
(s/def ::transform transform/transformations?)
(s/def ::auth (s/keys :opt-un [::mastodon ::twitter ::tumblr]))
(def config?
(s/keys :req-un [::auth ::transform]))
(defn-spec mastodon-config ::mastodon-config
(s/def ::options (s/* #{"-h"}))
(s/def ::config-location (s/? (s/and string?
#(not (cs/starts-with? % "-")))))
(s/def ::args (s/cat :options ::options
:config-location ::config-location))
(defn-spec mastodon-auth ::mastodon
[config config?]
(:mastodon-config config))
(get-in config [:auth :mastodon]))
(defn-spec twitter-config ::twitter
(defn-spec twitter-auth ::twitter
[config config?]
(:twitter config))
(get-in config [:auth :twitter]))
(def config (infra/load-config))
(defn-spec tumblr-auth ::tumblr
[config config?]
(get-in config [:auth :tumblr]))
(defn trim-text [text]
(let [max-post-length (masto/max-post-length (mastodon-config config))]
(cond
(defn-spec transform ::transform
[config config?]
(:transform config))
(nil? max-post-length)
text
(defn-spec transform! any?
[config config?]
(let [mastodon-auth (mastodon-auth config)]
(masto/get-mastodon-timeline
mastodon-auth
(fn [timeline]
(let [last-post-time (-> timeline first :created_at (js/Date.))]
(let [{:keys [transform]} config]
(doseq [transformation transform]
(let [source-type (get-in transformation [:source :source-type])
target-type (get-in transformation [:target :target-type])]
(cond
;;post from Twitter
(and (= :twitter source-type)
(= :mastodon target-type))
(when-let [twitter-auth (twitter-auth config)]
(transform/tweets-to-mastodon
mastodon-auth
twitter-auth
transformation
last-post-time))
;;post from RSS
(and (= :rss source-type)
(= :mastodon target-type))
(transform/rss-to-mastodon
mastodon-auth
transformation
last-post-time)
;;post from Tumblr
(and (= :tumblr source-type)
(= :mastodon target-type))
(when-let [tumblr-auth (tumblr-auth config)]
(transform/tumblr-to-mastodon
mastodon-auth
tumblr-auth
transformation
last-post-time))
))))
)))))
(> (count text) max-post-length)
(reduce
(fn [text word]
(if (> (+ (count text) (count word)) (- max-post-length 3))
(reduced (str text "..."))
(str text " " word)))
""
(string/split text #" "))
(def usage
"usage:
node target/mastodon-bot.js [-h] /path/to/config.edn
or
npm start [-h] /path/to/config.edn
")
:else text)))
(defn main [& args]
(let [parsed-args (s/conform ::args args)]
(if (= ::s/invalid parsed-args)
(do (s/explain ::args args)
(infra/exit-with-error (str "Bad commandline arguments\n" usage)))
(let [{:keys [options config-location]} parsed-args]
(cond
(some #(= "-h" %) options)
(print usage)
:default
(let [config (infra/load-config config-location)]
(when (not (s/valid? config? config))
(s/explain config? config)
(infra/exit-with-error "Bad configuration"))
(transform! config)))))))
(defn in [needle haystack]
(some (partial = needle) haystack))
; If the text ends in a link to the media (which is uploaded anyway),
; chop it off instead of including the link in the toot
(defn chop-tail-media-url [text media]
(string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0))))
(defn parse-tweet [{created-at :created_at
text :full_text
{:keys [media]} :extended_entities
{:keys [screen_name]} :user :as tweet}]
{:created-at (js/Date. created-at)
:text (trim-text (str (chop-tail-media-url text media)
(if (masto/append-screen-name? (mastodon-config config))
(str "\n - " screen_name) "")))
:media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)})
(defmulti parse-tumblr-post :type)
(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}]
{:created-at (js/Date. date)
:text (str (trim-text body) "\n\n" short_url)})
(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}]
{:created-at (js/Date. date)
:text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url])
:media-links (mapv #(-> % :original_size :url) photos)})
(defmethod parse-tumblr-post :default [post])
(defn post-tumblrs [last-post-time]
(fn [err response]
(->> response
infra/js->edn
:posts
(mapv parse-tumblr-post)
(masto/post-items
(mastodon-config config)
last-post-time))))
(defn post-tweets [last-post-time]
(fn [error tweets response]
(if error
(infra/exit-with-error error)
(->> (infra/js->edn tweets)
(map parse-tweet)
(masto/post-items
(mastodon-config config)
last-post-time)))))
(defn parse-feed [last-post-time parser [title url]]
(-> (.parseURL parser url)
(.then #(masto/post-items
(mastodon-config config)
last-post-time
(for [{:keys [title isoDate pubDate content link]} (-> % infra/js->edn :items)]
{:created-at (js/Date. (or isoDate pubDate))
:text (str (trim-text title) "\n\n" (twitter/strip-utm link))})))))
(defn tumblr-client [access-keys account]
(try
(tumblr/Blog. account (clj->js access-keys))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Tumblr account " account ": " (.-message e))))))
(defn -main []
(masto/get-mastodon-timeline
(mastodon-config config)
(fn [timeline]
(let [last-post-time (-> timeline first :created_at (js/Date.))]
;;post from Twitter
(when-let [twitter-config (:twitter config)]
(let [{:keys [accounts]} twitter-config]
(doseq [account accounts]
(twitter/user-timeline
twitter-config
account
(post-tweets last-post-time)))))
;;post from Tumblr
(when-let [{:keys [access-keys accounts limit]} (:tumblr config)]
(doseq [account accounts]
(let [client (tumblr-client access-keys account)]
(.posts client #js {:limit (or limit 5)} (post-tumblrs last-post-time)))))
;;post from RSS
(when-let [feeds (some-> config :rss)]
(let [parser (rss.)]
(doseq [feed feeds]
(parse-feed last-post-time parser feed))))))))
(set! *main-cli-fn* -main)
(st/instrument 'mastodon-config)
(st/instrument 'mastodon-auth)
(st/instrument 'twitter-auth)
(st/instrument 'transform)

View file

@ -1,8 +1,17 @@
(ns mastodon-bot.infra
(:require
[cljs.reader :as edn]
[clojure.pprint :refer [pprint]]
["fs" :as fs]))
(defn debug [item]
(pprint item)
item)
(defn debug-first [item]
(pprint (first item))
item)
(defn js->edn [data]
(js->clj data :keywordize-keys true))
@ -10,13 +19,16 @@
(js/console.error error)
(js/process.exit 1))
(defn find-config []
(let [config (or (first *command-line-args*)
(defn find-config [config-location]
(let [config (or config-location
(-> js/process .-env .-MASTODON_BOT_CONFIG)
"config.edn")]
(if (fs/existsSync config)
config
(exit-with-error (str "failed to read config: " config)))))
(defn load-config []
(-> (find-config) (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
(defn load-config [config-location]
(-> config-location
(find-config)
(fs/readFileSync #js {:encoding "UTF-8"})
edn/read-string))

View file

@ -5,166 +5,161 @@
[orchestra.core :refer-macros [defn-spec]]
[clojure.string :as string]
[mastodon-bot.infra :as infra]
["deasync" :as deasync]
["request" :as request]
["mastodon-api" :as mastodon]))
(s/def ::access_token string?)
(s/def ::api_url string?)
(s/def ::account-id string?)
(s/def ::content-filter string?)
(s/def ::keyword-filter string?)
(s/def ::append-screen-name? boolean?)
(s/def ::signature string?)
(s/def ::sensitive? boolean?)
(s/def ::resolve-urls? boolean?)
(s/def ::nitter-urls? boolean?)
(s/def ::visibility string?)
(s/def ::replacements string?)
(s/def ::media-only? boolean?)
(s/def ::visibility #{"direct" "private" "unlisted" "public"})
(s/def ::max-post-length (fn [n] (and
(int? n)
(<= n 500)
(> n 0))))
(def mastodon-auth? (s/keys :req-un [::account-id ::access_token ::api_url]))
(def mastodon-target? (s/keys :opt-un [::max-post-length
::signature
::visibility
::append-screen-name?
::sensitive?
::media-only?]))
(s/def ::content-filters (s/* ::content-filter))
(s/def ::keyword-filters (s/* ::keyword-filter))
(s/def ::mastodon-js-config (s/keys :req-un [::access_token ::api_url]))
(s/def ::mastodon-clj-config (s/keys :req-un [::account-id ::content-filters ::keyword-filters
::max-post-length ::signature ::visibility
::append-screen-name? ::sensitive? ::resolve-urls?
::nitter-urls? ::replacements]))
(def mastodon-config? (s/merge ::mastodon-js-config ::mastodon-clj-config))
(def mastodon-target-defaults {:append-screen-name? false
:visibility "public"
:sensitive? true
:media-only? false
:max-post-length 300})
(defn-spec content-filter-regexes ::content-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:content-filters mastodon-config)))
(s/def ::created-at any?)
(s/def ::text string?)
(s/def ::media-links string?)
(defn-spec keyword-filter-regexes ::keyword-filters
[mastodon-config mastodon-config?]
(mapv re-pattern (:keyword-filters mastodon-config)))
(def mastodon-output? (s/keys :req-un [::created-at ::text]
:opt-un [::media-links]))
(defn-spec append-screen-name? ::append-screen-name?
[mastodon-config mastodon-config?]
(boolean (:append-screen-name? mastodon-config)))
(defn trim-text [text max-post-length]
(cond
(nil? max-post-length)
text
(> (count text) max-post-length)
(reduce
(fn [text word]
(if (> (+ (count text) (count word)) (- max-post-length 3))
(reduced (str text "..."))
(str text " " word)))
""
(string/split text #" "))
:else text))
(defn-spec max-post-length ::max-post-length
[mastodon-config mastodon-config?]
(:max-post-length mastodon-config))
(defn-spec perform-replacements string?
[mastodon-config mastodon-config?
text string?]
(reduce-kv string/replace text (:replacements mastodon-config)))
[target mastodon-target?]
(:max-post-length target))
(defn-spec mastodon-client any?
[mastodon-config mastodon-config?]
(or (some-> mastodon-config clj->js mastodon.)
(infra/exit-with-error "missing Mastodon client configuration!")))
(defn-spec blocked-content? boolean?
[mastodon-config mastodon-config?
text string?]
(boolean
(or (some #(re-find % text) (content-filter-regexes mastodon-config))
(when (not-empty (keyword-filter-regexes mastodon-config))
(empty? (some #(re-find % text) (keyword-filter-regexes mastodon-config)))))))
[mastodon-auth mastodon-auth?]
(or (some-> mastodon-auth
clj->js
mastodon.)
(infra/exit-with-error "missing Mastodon auth configuration!")))
(defn-spec delete-status any?
[mastodon-config mastodon-config?
[mastodon-auth mastodon-auth?
status-id string?]
(.delete (mastodon-client mastodon-config) (str "statuses/" status-id) #js {}))
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
(defn-spec resolve-urls string?
[mastodon-config mastodon-config?
text string?]
(cond-> text
(:resolve-urls? mastodon-config)
(string/replace shortened-url-pattern resolve-url)
(:nitter-urls? mastodon-config)
(string/replace #"https://twitter.com" "https://nitter.net")))
(defn-spec set-signature string?
[mastodon-config mastodon-config?
text string?]
(if-let [signature (:signature mastodon-config )]
(str text "\n" signature)
text))
(.delete (mastodon-client mastodon-auth) (str "statuses/" status-id) #js {}))
(defn post-status
([mastodon-config status-text]
(post-status mastodon-config status-text nil print))
([mastodon-config status-text media-ids]
(post-status mastodon-config status-text media-ids print))
([mastodon-config status-text media-ids callback]
(let [{:keys [sensitive? signature visibility]} mastodon-config]
(-> (.post (mastodon-client mastodon-config) "statuses"
(clj->js (merge {:status (->> status-text
(resolve-urls mastodon-config)
(perform-replacements mastodon-config)
(set-signature mastodon-config))}
([mastodon-auth target status-text]
(post-status mastodon-auth target status-text nil print))
([mastodon-auth target status-text media-ids]
(post-status mastodon-auth target status-text media-ids print))
([mastodon-auth target status-text media-ids callback]
(let [{:keys [visibility sensitive?]} target]
(-> (.post (mastodon-client mastodon-auth) "statuses"
(clj->js (merge {:status status-text}
(when media-ids {:media_ids media-ids})
(when sensitive? {:sensitive sensitive?})
(when visibility {:visibility visibility}))))
(.then #(-> % callback))))))
(defn-spec post-image any?
[mastodon-config mastodon-config?
[mastodon-auth mastodon-auth?
target mastodon-target?
image-stream any?
description string?
callback fn?]
(-> (.post (mastodon-client mastodon-config) "media"
(-> (.post (mastodon-client mastodon-auth) "media"
#js {:file image-stream :description description})
(.then #(-> % .-data .-id callback))))
(defn post-status-with-images
([mastodon-config status-text urls]
(post-status-with-images mastodon-config status-text urls [] print))
([mastodon-config status-text urls ids]
(post-status-with-images mastodon-config status-text urls ids print))
([mastodon-config status-text [url & urls] ids callback]
([mastodon-auth target status-text urls]
(post-status-with-images mastodon-auth target status-text urls [] print))
([mastodon-auth target status-text urls ids]
(post-status-with-images mastodon-auth target status-text urls ids print))
([mastodon-auth target status-text [url & urls] ids callback]
(if url
(-> request
(.get url)
(.on "response"
(fn [image-stream]
(post-image mastodon-config image-stream status-text
#(post-status-with-images mastodon-config status-text urls (conj ids %) callback)))))
(post-status mastodon-config status-text (not-empty ids) callback))))
(post-image mastodon-auth target image-stream status-text
#(post-status-with-images mastodon-auth
target
status-text
urls
(conj ids %)
callback)))))
(post-status mastodon-auth target status-text (not-empty ids) callback))))
(defn-spec post-items any?
[mastodon-auth mastodon-auth?
target mastodon-target?
items any?]
(doseq [{:keys [text media-links]} items]
(if media-links
(post-status-with-images mastodon-auth target text media-links)
(when-not (:media-only? target)
(post-status mastodon-auth target text)))))
(defn-spec get-mastodon-timeline any?
[mastodon-config mastodon-config?
[mastodon-auth mastodon-auth?
callback fn?]
(.then (.get (mastodon-client mastodon-config)
(str "accounts/" (:account-id mastodon-config)"/statuses") #js {})
(.then (.get (mastodon-client mastodon-auth)
(str "accounts/" (:account-id mastodon-auth) "/statuses") #js {})
#(let [response (-> % .-data infra/js->edn)]
(if-let [error (::error response)]
(infra/exit-with-error error)
(callback response)))))
(defn-spec post-items any?
[mastodon-config mastodon-config?
last-post-time any?
items any?]
(doseq [{:keys [text media-links]}
(->> items
(remove #(blocked-content? mastodon-config (:text %)))
(filter #(> (:created-at %) last-post-time)))]
(if media-links
(post-status-with-images mastodon-config text media-links)
(when-not (::media-only? mastodon-config)
(post-status mastodon-config text)))))
(defn-spec intermediate-to-mastodon mastodon-output?
[target mastodon-target?
input any?]
(let [target-with-defaults (merge mastodon-target-defaults
target)
{:keys [created-at text media-links screen_name untrimmed-text]} input
{:keys [signature append-screen-name?]} target-with-defaults
untrimmed (if (some? untrimmed-text)
(str " " untrimmed-text) "")
sname (if append-screen-name?
(str "\n#" screen_name) "")
signature_text (if (some? signature)
(str "\n" signature)
"")
trim-length (- (max-post-length target-with-defaults)
(count untrimmed)
(count sname)
(count signature_text))]
{:created-at created-at
:text (str (trim-text text trim-length)
untrimmed
sname
signature_text)
:reblogged true
:media-links media-links}))

View file

@ -0,0 +1,40 @@
(ns mastodon-bot.rss-api
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[orchestra.core :refer-macros [defn-spec]]
["rss-parser" :as rss]
[mastodon-bot.infra :as infra]
))
(s/def ::feed (s/cat :name string? :url string?))
(s/def ::feeds (s/coll-of ::feed))
(def rss-source? (s/keys :req-un [::feeds]))
(s/def ::title string?)
(s/def ::content string?)
(s/def ::link string?)
(s/def ::author string?)
(s/def ::isoDate string?)
(s/def ::pubDate string?)
(s/def ::feed-item (s/keys :req-un [::title ::content ::link]
:opt-un [::author ::isoDate ::pubDate]))
(defn-spec rss-client any?
[]
(rss.))
(defn-spec parse-feed any?
[item ::feed-item]
(let [{:keys [title isoDate pubDate content link]} item]
{:created-at (js/Date. (or isoDate pubDate))
:text (str title
"\n\n"
link)}))
(defn-spec get-feed map?
[url string?
callback fn?]
(print url)
(-> (.parseURL (rss-client) url)
(.then callback)))

View file

@ -0,0 +1,188 @@
(ns mastodon-bot.transform
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[orchestra.core :refer-macros [defn-spec]]
[clojure.string :as string]
[mastodon-bot.infra :as infra]
[mastodon-bot.mastodon-api :as masto]
[mastodon-bot.twitter-api :as twitter]
[mastodon-bot.rss-api :as rss]
[mastodon-bot.tumblr-api :as tumblr]
["deasync" :as deasync]
["request" :as request]))
(s/def ::created-at any?)
(s/def ::text string?)
(s/def ::untrimmed-text string?)
(s/def ::media-links string?)
(s/def ::screen_name string?)
(def intermediate? (s/keys :req-un [::created-at ::text ::screen_name]
:opt-un [::media-links ::untrimmed-text]))
(s/def ::source-type #{:twitter :rss :tumblr})
(s/def ::resolve-urls? boolean?)
(s/def ::content-filter string?)
(s/def ::content-filters (s/* ::content-filter))
(s/def ::keyword-filter string?)
(s/def ::keyword-filters (s/* ::keyword-filter))
(s/def ::replacements any?)
(defmulti source-type :source-type)
(defmethod source-type :twitter [_]
(s/merge (s/keys :req-un[::source-type]) twitter/twitter-source?))
(defmethod source-type :rss [_]
(s/merge (s/keys :req-un [::source-type]) rss/rss-source?))
(defmethod source-type :tumblr [_]
(s/merge (s/keys :req-un [::source-type]) tumblr/tumblr-source?))
(s/def ::source (s/multi-spec source-type ::source-type))
(s/def ::target-type #{:mastodon})
(defmulti target-type :target-type)
(defmethod target-type :mastodon [_]
(s/merge (s/keys :req-un [::target-type]) masto/mastodon-target?))
(s/def ::target (s/multi-spec target-type ::target-type))
(s/def ::transformation (s/keys :req-un [::source ::target]
:opt-un [::resolve-urls? ::content-filters ::keyword-filters
::replacements]))
(def transformations? (s/* ::transformation))
(defn resolve-url [[uri]]
(try
(or
(some-> ((deasync request)
#js {:method "GET"
:uri (if (string/starts-with? uri "https://") uri (str "https://" uri))
:followRedirect false})
(.-headers)
(.-location)
(string/replace "?mbid=social_twitter" ""))
uri)
(catch js/Error _ uri)))
(def shortened-url-pattern #"(https?://)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?")
(defn-spec intermediate-resolve-urls intermediate?
[resolve-urls? ::resolve-urls?
input intermediate?]
(if resolve-urls?
(update input :text #(string/replace % shortened-url-pattern resolve-url))
input))
(defn-spec content-filter-regexes ::content-filters
[transformation ::transformation]
(mapv re-pattern (:content-filters transformation)))
(defn-spec keyword-filter-regexes ::keyword-filters
[transformation ::transformation]
(mapv re-pattern (:keyword-filters transformation)))
(defn-spec blocked-content? boolean?
[transformation ::transformation
text string?]
(boolean
(or (some #(re-find % text) (content-filter-regexes transformation))
(when (not-empty (keyword-filter-regexes transformation))
(empty? (some #(re-find % text) (keyword-filter-regexes transformation)))))))
(defn-spec perform-replacements intermediate?
[transformation ::transformation
input intermediate?]
(update input :text #(reduce-kv string/replace % (:replacements transformation))))
(defn-spec post-tweets-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target resolve-urls?]} transformation]
(fn [error tweets response]
(if error
(infra/exit-with-error error)
(->> (infra/js->edn tweets)
(map twitter/parse-tweet)
(filter #(> (:created-at %) last-post-time))
(remove #(blocked-content? transformation (:text %)))
(map #(intermediate-resolve-urls resolve-urls? %))
(map #(twitter/nitter-url source %))
(map #(perform-replacements transformation %))
(map #(masto/intermediate-to-mastodon target %))
(masto/post-items mastodon-auth target))))))
(defn-spec tweets-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
twitter-auth twitter/twitter-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target resolve-urls?]} transformation]
(doseq [account (:accounts source)]
(twitter/user-timeline
twitter-auth
source
account
(post-tweets-to-mastodon
mastodon-auth
transformation
last-post-time)))))
(defn-spec post-tumblr-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target resolve-urls?]} transformation]
(fn [error tweets response]
(if error
(infra/exit-with-error error)
(->> (infra/js->edn tweets)
:posts
(mapv tumblr/parse-tumblr-post)
(filter #(> (:created-at %) last-post-time))
(remove #(blocked-content? transformation (:text %)))
(map #(perform-replacements transformation %))
(map #(masto/intermediate-to-mastodon target %))
(masto/post-items mastodon-auth target))))))
(defn-spec tumblr-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
tumblr-auth tumblr/tumblr-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [accounts limit]} transformation]
(doseq [account accounts]
(let [client (tumblr/tumblr-client tumblr-auth account)]
(.posts client
#js {:limit (or limit 5)}
(post-tumblr-to-mastodon
mastodon-auth
transformation
last-post-time)
)))))
(defn-spec post-rss-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target resolve-urls?]} transformation]
(fn [payload]
(->> (infra/js->edn payload)
(:items)
(map rss/parse-feed)
(filter #(> (:created-at %) last-post-time))
(remove #(blocked-content? transformation (:text %)))
(map #(intermediate-resolve-urls resolve-urls? %))
(map #(perform-replacements transformation %))
(map #(masto/intermediate-to-mastodon target %))
(masto/post-items mastodon-auth target)))))
(defn-spec rss-to-mastodon any?
[mastodon-auth masto/mastodon-auth?
transformation ::transformation
last-post-time any?]
(let [{:keys [source target]} transformation]
(doseq [[name url] (:feeds source)]
(rss/get-feed
url
(post-rss-to-mastodon
mastodon-auth
transformation
last-post-time)))))

View file

@ -0,0 +1,44 @@
(ns mastodon-bot.tumblr-api
(:require
[clojure.spec.alpha :as s]
[clojure.spec.test.alpha :as st]
[orchestra.core :refer-macros [defn-spec]]
[clojure.string :as string]
[mastodon-bot.infra :as infra]
["tumblr" :as tumblr]
))
(s/def ::consumer_key string?)
(s/def ::consumer_secret string?)
(s/def ::token string?)
(s/def ::token_secret string?)
(def tumblr-auth? (s/keys :req-un [::consumer_key ::consumer_secret ::token
::token_secret]))
(s/def ::limit pos?)
(s/def ::account string?)
(s/def ::accounts (s/* ::account))
(def tumblr-source? (s/keys :req-un [::limit ::accounts]))
(defn-spec tumblr-client any?
[access-keys tumblr-auth?
account string?]
(try
(tumblr/Blog. account (clj->js access-keys))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Tumblr account " account ": " (.-message e))))))
(defmulti parse-tumblr-post :type)
(defmethod parse-tumblr-post "text" [{:keys [body date short_url]}]
{:created-at (js/Date. date)
:text body
:untrimmed-text (str "\n\n" short_url)})
(defmethod parse-tumblr-post "photo" [{:keys [caption date photos short_url] :as post}]
{:created-at (js/Date. date)
:text (string/join "\n" [(string/replace caption #"<[^>]*>" "") short_url])
:media-links (mapv #(-> % :original_size :url) photos)})
(defmethod parse-tumblr-post :default [post])

View file

@ -12,33 +12,58 @@
(s/def ::consumer_secret string?)
(s/def ::access_token_key string?)
(s/def ::access_token_secret string?)
(s/def ::access-keys (s/keys :req-un [::consumer_key ::consumer_secret ::access_token_key
(def twitter-auth? (s/keys :req-un [::consumer_key ::consumer_secret ::access_token_key
::access_token_secret]))
(s/def ::include-rts? boolean?)
(s/def ::include-replies? boolean?)
(s/def ::nitter-urls? boolean?)
(s/def ::account string?)
(s/def ::accounts (s/* ::account))
(def twitter-config? (s/keys :req-un [::access-keys ::include-rts? ::include-replies?]))
(def twitter-source? (s/keys :req-un [::include-rts? ::include-replies? ::accounts]
:opt-un [::nitter-urls?]))
(defn-spec twitter-client any?
[twitter-auth twitter-auth?]
(try
(twitter. (clj->js twitter-auth))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Twitter: " (.-message e))))))
(defn strip-utm [news-link]
(first (string/split news-link #"\?utm")))
(defn-spec twitter-client any?
[twitter-config twitter-config?]
(let [{:keys [access-keys]} twitter-config]
(try
(twitter. (clj->js access-keys))
(catch js/Error e
(infra/exit-with-error
(str "failed to connect to Twitter: " (.-message e)))))))
(defn in [needle haystack]
(some (partial = needle) haystack))
; If the text ends in a link to the media (which is uploaded anyway),
; chop it off instead of including the link in the toot
(defn chop-tail-media-url [text media]
(string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0))))
(defn parse-tweet [{created-at :created_at
text :full_text
{:keys [media]} :extended_entities
{:keys [screen_name]} :user :as tweet}]
{:created-at (js/Date. created-at)
:text (chop-tail-media-url text media)
:screen_name screen_name
:media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)})
(defn-spec nitter-url map?
[source twitter-source?
parsed-tweet map?]
(when (:nitter-urls? source)
(update parsed-tweet :text #(string/replace % #"https://twitter.com" "https://nitter.net"))))
(defn-spec user-timeline any?
[twitter-config twitter-config?
[twitter-auth twitter-auth?
source twitter-source?
account ::account
callback fn?]
(let [{:keys [include-rts? include-replies?]} twitter-config]
(.get (twitter-client twitter-config)
(let [{:keys [include-rts? include-replies?]} source]
(.get (twitter-client twitter-auth)
"statuses/user_timeline"
#js {:screen_name account
:tweet_mode "extended"

View file

@ -1,6 +1,4 @@
#!/usr/bin/env lumo
(ns mastodon-bot.core_test
(ns mastodon-bot.core-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[cljs.reader :as edn]
@ -8,31 +6,20 @@
[mastodon-bot.core :as core]
))
(deftest test-read-config
(is (= 300 core/max-post-length)))
;; (deftest test-read-config
;; (is (= 300 core/max-post-length)))
(defn readfile [filename]
(-> filename (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
;; (defn readfile [filename]
;; (-> filename (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
(deftest test-remove-link-to-image
(is (=
"Mensen vragen om meer foto's in SPAMSPAMSPAM, dus bij deze achteraf de nieuwe kasten voor de projectenkast en de bookcrossingzone. Te vinden direct bij binnenkomst op de eerste en tweede verdieping."
(:text (core/parse-tweet (readfile "testdata/twitter/tweet-with-link-to-image.edn")))
)))
;; (deftest test-remove-link-to-image
;; (is (=
;; "Mensen vragen om meer foto's in SPAMSPAMSPAM, dus bij deze achteraf de nieuwe kasten voor de projectenkast en de bookcrossingzone. Te vinden direct bij binnenkomst op de eerste en tweede verdieping."
;; (:text (core/parse-tweet (readfile "testdata/twitter/tweet-with-link-to-image.edn")))
;; )))
(deftest test-parse-normal-tweet-text
(is (=
"Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem"
(:text (core/parse-tweet (readfile "testdata/twitter/normal-tweet.edn")))
)))
(deftest test-replacements
(is (=
"💠 Check out what has been going on during March in the world of @ReproBuilds! 💠 https://t.co/k6NsSO115z @opensuse@fosstodon.org @conservancy@mastodon.technology @PrototypeFund@mastodon.social @debian@fosstodon.org "
(:text (core/perform-replacements (core/parse-tweet (readfile "testdata/twitter/tweet-mentions.edn"))))
)))
(cljs.test/run-tests)
; Don't run core's -main when testing
(set! *main-cli-fn* ())
;; (deftest test-parse-normal-tweet-text
;; (is (=
;; "Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem"
;; (:text (core/parse-tweet (readfile "testdata/twitter/normal-tweet.edn")))
;; )))

View file

@ -0,0 +1,53 @@
(ns mastodon-bot.mastodon-api-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[clojure.spec.alpha :as s]
[mastodon-bot.mastodon-api :as sut]
))
(def intermediate-rss-item {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"})
(deftest should-not-append-screen-name
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC
https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/
#rssbot"
:reblogged true, :media-links nil}
(sut/intermediate-to-mastodon {:target-type :mastodon
:append-screen-name? false
:max-post-length 500
:signature "#rssbot"}
intermediate-rss-item)))
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC
https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/
#rssbot"
:reblogged true, :media-links nil}
(sut/intermediate-to-mastodon {:target-type :mastodon
:max-post-length 500
:signature "#rssbot"}
intermediate-rss-item))))
(deftest should-not-trim
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC
https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"
:reblogged true, :media-links nil}
(sut/intermediate-to-mastodon {:target-type :mastodon}
intermediate-rss-item))))
(deftest should-not-append-signature
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC
https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"
:reblogged true, :media-links nil}
(sut/intermediate-to-mastodon {:target-type :mastodon
:append-screen-name? false
:max-post-length 500}
intermediate-rss-item))))

View file

@ -0,0 +1,44 @@
(ns mastodon-bot.rss-api-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[clojure.spec.alpha :as s]
[mastodon-bot.rss-api :as sut]
))
(deftest test-spec
(is (s/valid? sut/rss-source?
{:feeds [["correctiv-blog" "https://news.correctiv.org/news/rss.php"]]}
)))
(def reddit-feed-item {:title "Datahike release 0.3.1"
:link
"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"
:pubDate "2020-06-26T00:36:48.000Z"
:author "/u/yogthos"
:content
"&#32; submitted by &#32; <a href=\"https://www.reddit.com/user/yogthos\"> /u/yogthos </a> <br/> <span><a href=\"https://github.com/replikativ/datahike/releases/tag/v0.3.1\">[link]</a></span> &#32; <span><a href=\"https://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/\">[comments]</a></span>"
:contentSnippet "submitted by /u/yogthos [link] [comments]"
:id "t3_hfxotu"
:isoDate "2020-06-26T00:36:48.000Z"})
(def hnrss-org-feed-item {:creator "seacaster"
:isoDate "2020-06-26T12:17:33.000Z"
:content
"\n<p>Article URL: <a href=\"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\">https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/</a></p>\n<p>Comments URL: <a href=\"https://news.ycombinator.com/item?id=23651117\">https://news.ycombinator.com/item?id=23651117</a></p>\n<p>Points: 1</p>\n<p># Comments: 0</p>\n"
:comments "https://news.ycombinator.com/item?id=23651117"
:dc:creator "seacaster"
:pubDate "Fri, 26 Jun 2020 12:17:33 +0000"
:contentSnippet
"Article URL: https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/\nComments URL: https://news.ycombinator.com/item?id=23651117\nPoints: 1\n# Comments: 0"
:title "Taking Theatre Online with WebGL and WebRTC"
:link
"https://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"
:guid "https://news.ycombinator.com/item?id=23651117"})
(deftest items-should-be-parsed
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
(sut/parse-feed hnrss-org-feed-item)))
(is (= {:created-at #inst "2020-06-26T00:36:48.000-00:00",
:text "Datahike release 0.3.1\n\nhttps://www.reddit.com/r/Clojure/comments/hfxotu/datahike_release_031/"}
(sut/parse-feed reddit-feed-item))))

View file

@ -0,0 +1,14 @@
(ns mastodon-bot.transform-rss-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[clojure.spec.alpha :as s]
[mastodon-bot.transform :as sut]
))
(def intermediate-rss-item {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"})
(deftest should-not-resolve-urls
(is (= {:created-at #inst "2020-06-26T12:17:33.000-00:00"
:text "Taking Theatre Online with WebGL and WebRTC\n\nhttps://chrisuehlinger.com/blog/2020/06/16/unshattering-the-audience-building-theatre-on-the-web-in-2020/"}
(sut/intermediate-resolve-urls false intermediate-rss-item))))

View file

@ -0,0 +1,41 @@
(ns mastodon-bot.transform-test
(:require
[cljs.test :refer-macros [deftest is testing run-tests]]
[clojure.spec.alpha :as s]
[cljs.reader :as edn]
["fs" :as fs]
[mastodon-bot.core :as core]
[mastodon-bot.twitter-api :as twitter]
[mastodon-bot.transform :as sut]
))
(deftest test-spec
(is (s/valid? sut/transformations?
[]))
(is (s/valid? sut/transformations?
[{:source {:source-type :twitter
:include-replies? false
:include-rts? true
:nitter-urls? true
:accounts ["an-twitter-account"]}
:target {:target-type :mastodon
:append-screen-name? true
:media-only? false
:max-post-length 500
:visibility "unlisted"
:sensitive? true
:signature "my-bot"}
:resolve-urls? true
:content-filters [".*bannedsite.*"]
:keyword-filters [".*"]}])))
(defn readfile [filename]
(-> filename (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
(def testconfig (readfile "test.edn"))
(deftest test-replacements
(is (=
"💠 Check out what has been going on during March in the world of @ReproBuilds! 💠 https://t.co/k6NsSO115z @opensuse@fosstodon.org @conservancy@mastodon.technology @PrototypeFund@mastodon.social @debian@fosstodon.org "
(:text (sut/perform-replacements (first (:transform testconfig)) (twitter/parse-tweet (readfile "testdata/twitter/tweet-mentions.edn"))))
)))

140
test.edn
View file

@ -1,69 +1,87 @@
{;; add Twitter config to mirror Twitter accounts
:twitter {:access-keys
{:consumer_key "XXXX"
:consumer_secret "XXXX"
:access_token_key "XXXX"
:access_token_secret "XXXX"}
;; optional, defaults to false
:include-replies? false
;; optional, defaults to false
:include-rts? false
;; accounts you wish to mirror
:accounts ["arstechnica" "WIRED"]}
;; add Tumblr config to mirror Tumblr accounts
:tumblr {:access-keys
{:consumer_key "XXXX"
:consumer_secret "XXXX"
:token "XXXX"
:token_secret "XXXX"}
;; optional limit for number of posts to retrieve, default: 5
:limit 10
:accounts ["cyberpunky.tumblr.com" "scipunk.tumblr.com"]}
;; add RSS config to follow feeds
:rss {"Hacker News" "https://hnrss.org/newest"
"r/Clojure" "https://www.reddit.com/r/clojure/.rss"}
:mastodon {:access_token "XXXX"
;; account number you see when you log in and go to your profile
;; e.g: https://mastodon.social/web/accounts/294795
:account-id "XXXX"
:api_url "https://botsin.space/api/v1/"
{:auth {;; add Twitter config to mirror Twitter accounts
:twitter {:consumer_key "XXXX"
:consumer_secret "XXXX"
:access_token_key "XXXX"
:access_token_secret "XXXX"}
:mastodon {:access_token "XXXX"
;; account number you see when you log in and go to your profile
;; e.g: https://mastodon.social/web/accounts/294795
:account-id "XXXX"
:api_url "https://botsin.space/api/v1/"}
:tumblr {:consumer_key "XXXX"
:consumer_secret "XXXX"
:token "XXXX"
:token_secret "XXXX"}}
:transform
[{:source {:source-type :twitter
;; optional, defaults to false
:include-replies? false
;; optional, defaults to false
:include-rts? false
;; Replace Twitter links by Nitter
:nitter-urls? false
;; accounts you wish to mirror
:accounts ["arstechnica" "WIRED"]}
:target {:target-type :mastodon
;; optional flag specifying wether the name of the account
;; will be appended in the post, defaults to false
:append-screen-name? false
;; optional visibility flag: direct, private, unlisted, public
;; defaults to public
:visibility "unlisted"
;; optional boolean to mark content as sensitive
:sensitive? true
;; optional boolean defaults to false
;; only sources containing media will be posted when set to true
:media-only? true
;; optional visibility flag: direct, private, unlisted, public
;; defaults to public
:visibility "unlisted"
;; optional limit for the post length
:max-post-length 300
;; optional flag specifying wether the name of the account
;; will be appended in the post, defaults to false
:append-screen-name? false
;; optional signature for posts
:signature "#newsbot"
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]
;; optional replacements
:replacements {"@openSUSE " "@opensuse@fosstodon.org ",
"@archlinux " "",
"@lolamby " "",
"@conservancy " "@conservancy@mastodon.technology ",
"@prototypefund " "@PrototypeFund@mastodon.social ",
"@coreboot_org " "",
"@OpenTechFund " "",
"@OpenWrtSummit " "",
"@OpenMirage " "",
"@debian " "@debian@fosstodon.org ",
"@nixos_org " "",
"@lwnnet " "",
"@guixhpc" ""}
;; Replace Twitter links by Nitter
:nitter-urls? false}}
:signature "#newsbot"}
;; optionally try to resolve URLs in posts to skip URL shorteners
;; defaults to false
:resolve-urls? true
;; optional content filter regexes
;; any posts matching the regexes will be filtered out
:content-filters [".*bannedsite.*"]
;; optional keyword filter regexes
;; any posts not matching the regexes will be filtered out
:keyword-filters [".*clojure.*"]
;; optional replacements
;; When the strings on the left side of this map are encountered in the source,
;; they are replaced with the string on the right side of the map:
:replacements {
"@openSUSE " "@opensuse@fosstodon.org ",
"@archlinux " "",
"@lolamby " "",
"@conservancy " "@conservancy@mastodon.technology ",
"@prototypefund " "@PrototypeFund@mastodon.social ",
"@coreboot_org " "",
"@OpenTechFund " "",
"@OpenWrtSummit " "",
"@OpenMirage " "",
"@debian " "@debian@fosstodon.org ",
"@nixos_org " "",
"@lwnnet " "",
"@guixhpc" ""}}
{:source {:source-type :rss
;; add RSS config to follow feeds
:feeds [["Hacker News" "https://hnrss.org/newest"]
["r/Clojure" "https://www.reddit.com/r/clojure/.rss"]]}
:target {:target-type :mastodon
:append-screen-name? false
:signature "#rssbot"}
:resolve-urls? true
:content-filters [".*bannedsite.*"]
:keyword-filters [".*clojure.*"]
:replacements nil}
{:source {:source-type :tumblr
;; optional limit for number of posts to retrieve, default: 5
:limit 10
:accounts ["cyberpunky.tumblr.com" "scipunk.tumblr.com"]}
:target {:target-type :mastodon
:signature "#tumblrbot"}}
]}

View file

@ -1,2 +1,29 @@
{:in_reply_to_screen_name nil, :is_quote_status false, :coordinates nil, :in_reply_to_status_id_str nil, :place nil, :possibly_sensitive false, :geo nil, :in_reply_to_status_id nil, :entities {:hashtags [{:text "hackerspace", :indices [185 197]} {:text "nieuws", :indices [198 205]} {:text "arnhem", :indices [206 213]} {:text "nuarnhem", :indices [214 223]}], :symbols [], :user_mentions [{:screen_name "Hack42", :name "Hackerspace Arnhem", :id 91565087, :id_str "91565087", :indices [137 144]}], :urls [{:url "https://t.co/O1YzlWTFU3", :expanded_url "https://mailchi.mp/6591af748e3e/spamspamspam2", :display_url "mailchi.mp/6591af748e3e/s…", :indices [161 184]}]}, :source "<a href=\"https://about.twitter.com/products/tweetdeck\" rel=\"nofollow\">TweetDeck</a>", :lang "nl", :in_reply_to_user_id_str nil, :full_text "Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem", :id 1233321189319291000, :contributors nil, :display_text_range [0 223], :truncated false, :retweeted false, :in_reply_to_user_id nil, :id_str "1233321189319290880", :favorited false, :user {:description "Hack42: hackerspace en computermuseum Arnhem. Een technische creatieve omgeving waar alles kan. Kom langs! Wat wordt jouw project?", :profile_link_color "250F7C", :profile_sidebar_border_color "FFFFFF", :is_translation_enabled true, :profile_image_url "http://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :profile_use_background_image true, :default_profile false, :profile_background_image_url "http://abs.twimg.com/images/themes/theme15/bg.png", :is_translator false, :profile_text_color "333333", :profile_banner_url "https://pbs.twimg.com/profile_banners/91565087/1497686456", :name "Hackerspace Arnhem", :profile_background_image_url_https "https://abs.twimg.com/images/themes/theme15/bg.png", :favourites_count 277, :screen_name "Hack42", :entities {:url {:urls [{:url "https://t.co/8YNbbxjeYQ", :expanded_url "http://hack42.nl/", :display_url "hack42.nl", :indices [0 23]}]}, :description {:urls []}}, :listed_count 78, :profile_image_url_https "https://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :statuses_count 11721, :has_extended_profile false, :contributors_enabled false, :following true, :lang nil, :utc_offset nil, :notifications false, :default_profile_image false, :profile_background_color "02152E", :id 91565087, :follow_request_sent false, :url "https://t.co/8YNbbxjeYQ", :translator_type "regular", :time_zone nil, :profile_sidebar_fill_color "C8C8E6", :protected false, :profile_background_tile true, :id_str "91565087", :geo_enabled false, :location "ARNHEM, NL", :followers_count 1666, :friends_count 61, :verified false, :created_at "Sat Nov 21 12:49:38 +0000 2009"}, :retweet_count 2, :favorite_count 5, :created_at "Fri Feb 28 09:21:00 +0000 2020"}
{:in_reply_to_screen_name nil,
:is_quote_status false,
:coordinates nil,
:in_reply_to_status_id_str nil,
:place nil,
:possibly_sensitive false,
:geo nil,
:in_reply_to_status_id nil,
:entities {:hashtags [{:text "hackerspace", :indices [185 197]} {:text "nieuws", :indices [198 205]} {:text "arnhem", :indices [206 213]} {:text "nuarnhem", :indices [214 223]}], :symbols [],
:user_mentions [{:screen_name "Hack42", :name "Hackerspace Arnhem", :id 91565087, :id_str "91565087", :indices [137 144]}],
:urls [{:url "https://t.co/O1YzlWTFU3",
:expanded_url "https://mailchi.mp/6591af748e3e/spamspamspam2",
:display_url "mailchi.mp/6591af748e3e/s…", :indices [161 184]}]},
:source "<a href=\"https://about.twitter.com/products/tweetdeck\" rel=\"nofollow\">TweetDeck</a>",
:lang "nl",
:in_reply_to_user_id_str nil,
:full_text "Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem",
:id 1233321189319291000,
:contributors nil,
:display_text_range [0 223],
:truncated false,
:retweeted false,
:in_reply_to_user_id nil,
:id_str "1233321189319290880",
:favorited false,
:user {:description "Hack42: hackerspace en computermuseum Arnhem. Een technische creatieve omgeving waar alles kan. Kom langs! Wat wordt jouw project?", :profile_link_color "250F7C", :profile_sidebar_border_color "FFFFFF", :is_translation_enabled true, :profile_image_url "http://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :profile_use_background_image true, :default_profile false, :profile_background_image_url "http://abs.twimg.com/images/themes/theme15/bg.png", :is_translator false, :profile_text_color "333333", :profile_banner_url "https://pbs.twimg.com/profile_banners/91565087/1497686456", :name "Hackerspace Arnhem", :profile_background_image_url_https "https://abs.twimg.com/images/themes/theme15/bg.png", :favourites_count 277, :screen_name "Hack42", :entities {:url {:urls [{:url "https://t.co/8YNbbxjeYQ", :expanded_url "http://hack42.nl/", :display_url "hack42.nl", :indices [0 23]}]}, :description {:urls []}},
:listed_count 78, :profile_image_url_https "https://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :statuses_count 11721, :has_extended_profile false, :contributors_enabled false, :following true, :lang nil, :utc_offset nil, :notifications false, :default_profile_image false, :profile_background_color "02152E", :id 91565087, :follow_request_sent false, :url "https://t.co/8YNbbxjeYQ", :translator_type "regular", :time_zone nil, :profile_sidebar_fill_color "C8C8E6", :protected false, :profile_background_tile true, :id_str "91565087", :geo_enabled false, :location "ARNHEM, NL", :followers_count 1666, :friends_count 61, :verified false, :created_at "Sat Nov 21 12:49:38 +0000 2009"},
:retweet_count 2, :favorite_count 5, :created_at "Fri Feb 28 09:21:00 +0000 2020"}