Merge pull request #34 from raboof/drop-media-link-from-twitter-text

Drop media link from twitter text
This commit is contained in:
Dmitri Sotnikov 2020-03-06 15:26:45 -05:00 committed by GitHub
commit 180f2a6eb5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 3 deletions

View file

@ -61,7 +61,7 @@
(reduced (str text "...")) (reduced (str text "..."))
(str text " " word))) (str text " " word)))
"" ""
(clojure.string/split text #" ")) (string/split text #" "))
:else text)) :else text))
@ -138,12 +138,20 @@
(when-not (:media-only? mastodon-config) (when-not (:media-only? mastodon-config)
(post-status text))))) (post-status text)))))
(defn in [needle haystack]
(some (partial = needle) haystack))
; If the text ends in a link to the media (which is uploaded anyway),
; chop it off instead of including the link in the toot
(defn chop-tail-media-url [text media]
(string/replace text #" (\S+)$" #(if (in (%1 1) (map :url media)) "" (%1 0))))
(defn parse-tweet [{created-at :created_at (defn parse-tweet [{created-at :created_at
text :full_text text :full_text
{:keys [media]} :extended_entities {:keys [media]} :extended_entities
{:keys [screen_name]} :user :as tweet}] {:keys [screen_name]} :user :as tweet}]
{:created-at (js/Date. created-at) {:created-at (js/Date. created-at)
:text (trim-text (if append-screen-name? (str text "\n - " screen_name) text)) :text (trim-text (str (chop-tail-media-url text media) (if append-screen-name? ("\n - " screen_name) "")))
:media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)}) :media-links (keep #(when (= (:type %) "photo") (:media_url_https %)) media)})
(defmulti parse-tumblr-post :type) (defmulti parse-tumblr-post :type)
@ -223,4 +231,4 @@
(when-let [feeds (some-> config :rss)] (when-let [feeds (some-> config :rss)]
(let [parser (rss.)] (let [parser (rss.)]
(doseq [feed feeds] (doseq [feed feeds]
(parse-feed last-post-time parser feed)))))))) (parse-feed last-post-time parser feed))))))))

View file

@ -3,10 +3,27 @@
(ns mastodon-bot.core_test (ns mastodon-bot.core_test
(:require (:require
[cljs.test :refer-macros [deftest is testing run-tests]] [cljs.test :refer-macros [deftest is testing run-tests]]
[cljs.reader :as edn]
["fs" :as fs]
[mastodon-bot.core :as core] [mastodon-bot.core :as core]
)) ))
(deftest test-read-config (deftest test-read-config
(is (= 300 core/max-post-length))) (is (= 300 core/max-post-length)))
(defn readfile [filename]
(-> filename (fs/readFileSync #js {:encoding "UTF-8"}) edn/read-string))
(deftest test-remove-link-to-image
(is (=
"Mensen vragen om meer foto's in SPAMSPAMSPAM, dus bij deze achteraf de nieuwe kasten voor de projectenkast en de bookcrossingzone. Te vinden direct bij binnenkomst op de eerste en tweede verdieping."
(:text (core/parse-tweet (readfile "testdata/twitter/tweet-with-link-to-image.edn")))
)))
(deftest test-parse-normal-tweet-text
(is (=
"Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem"
(:text (core/parse-tweet (readfile "testdata/twitter/normal-tweet.edn")))
)))
(cljs.test/run-tests) (cljs.test/run-tests)

2
testdata/twitter/normal-tweet.edn vendored Normal file
View file

@ -0,0 +1,2 @@
{:in_reply_to_screen_name nil, :is_quote_status false, :coordinates nil, :in_reply_to_status_id_str nil, :place nil, :possibly_sensitive false, :geo nil, :in_reply_to_status_id nil, :entities {:hashtags [{:text "hackerspace", :indices [185 197]} {:text "nieuws", :indices [198 205]} {:text "arnhem", :indices [206 213]} {:text "nuarnhem", :indices [214 223]}], :symbols [], :user_mentions [{:screen_name "Hack42", :name "Hackerspace Arnhem", :id 91565087, :id_str "91565087", :indices [137 144]}], :urls [{:url "https://t.co/O1YzlWTFU3", :expanded_url "https://mailchi.mp/6591af748e3e/spamspamspam2", :display_url "mailchi.mp/6591af748e3e/s…", :indices [161 184]}]}, :source "<a href=\"https://about.twitter.com/products/tweetdeck\" rel=\"nofollow\">TweetDeck</a>", :lang "nl", :in_reply_to_user_id_str nil, :full_text "Daar is 'ie dan! SPAMSPAMSPAM editie 2! Met een samenvatting van wat er in deze eerste twee maanden van 2020 gebeurd en gedaan is binnen @hack42. Lees het via: \nhttps://t.co/O1YzlWTFU3 #hackerspace #nieuws #arnhem #nuarnhem", :id 1233321189319291000, :contributors nil, :display_text_range [0 223], :truncated false, :retweeted false, :in_reply_to_user_id nil, :id_str "1233321189319290880", :favorited false, :user {:description "Hack42: hackerspace en computermuseum Arnhem. Een technische creatieve omgeving waar alles kan. Kom langs! Wat wordt jouw project?", :profile_link_color "250F7C", :profile_sidebar_border_color "FFFFFF", :is_translation_enabled true, :profile_image_url "http://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :profile_use_background_image true, :default_profile false, :profile_background_image_url "http://abs.twimg.com/images/themes/theme15/bg.png", :is_translator false, :profile_text_color "333333", :profile_banner_url "https://pbs.twimg.com/profile_banners/91565087/1497686456", :name "Hackerspace Arnhem", :profile_background_image_url_https "https://abs.twimg.com/images/themes/theme15/bg.png", :favourites_count 277, :screen_name "Hack42", :entities {:url {:urls [{:url "https://t.co/8YNbbxjeYQ", :expanded_url "http://hack42.nl/", :display_url "hack42.nl", :indices [0 23]}]}, :description {:urls []}}, :listed_count 78, :profile_image_url_https "https://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :statuses_count 11721, :has_extended_profile false, :contributors_enabled false, :following true, :lang nil, :utc_offset nil, :notifications false, :default_profile_image false, :profile_background_color "02152E", :id 91565087, :follow_request_sent false, :url "https://t.co/8YNbbxjeYQ", :translator_type "regular", :time_zone nil, :profile_sidebar_fill_color "C8C8E6", :protected false, :profile_background_tile true, :id_str "91565087", :geo_enabled false, :location "ARNHEM, NL", :followers_count 1666, :friends_count 61, :verified false, :created_at "Sat Nov 21 12:49:38 +0000 2009"}, :retweet_count 2, :favorite_count 5, :created_at "Fri Feb 28 09:21:00 +0000 2020"}

View file

@ -0,0 +1 @@
{:in_reply_to_screen_name nil, :is_quote_status false, :coordinates nil, :in_reply_to_status_id_str nil, :place nil, :possibly_sensitive false, :geo nil, :in_reply_to_status_id nil, :entities {:hashtags [], :symbols [], :user_mentions [], :urls [], :media [{:sizes {:thumb {:w 150, :h 150, :resize "crop"}, :large {:w 1536, :h 2048, :resize "fit"}, :medium {:w 900, :h 1200, :resize "fit"}, :small {:w 510, :h 680, :resize "fit"}}, :media_url_https "https://pbs.twimg.com/media/ER3qUNxXUAE-UUN.jpg", :type "photo", :media_url "http://pbs.twimg.com/media/ER3qUNxXUAE-UUN.jpg", :id 1233399505984376800, :expanded_url "https://twitter.com/Hack42/status/1233399612549017601/photo/1", :url "https://t.co/txtygM76rw", :display_url "pic.twitter.com/txtygM76rw", :indices [199 222], :id_str "1233399505984376833"}]}, :source "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>", :lang "nl", :in_reply_to_user_id_str nil, :full_text "Mensen vragen om meer foto's in SPAMSPAMSPAM, dus bij deze achteraf de nieuwe kasten voor de projectenkast en de bookcrossingzone. Te vinden direct bij binnenkomst op de eerste en tweede verdieping. https://t.co/txtygM76rw", :id 1233399612549017600, :contributors nil, :extended_entities {:media [{:sizes {:thumb {:w 150, :h 150, :resize "crop"}, :large {:w 1536, :h 2048, :resize "fit"}, :medium {:w 900, :h 1200, :resize "fit"}, :small {:w 510, :h 680, :resize "fit"}}, :media_url_https "https://pbs.twimg.com/media/ER3qUNxXUAE-UUN.jpg", :type "photo", :media_url "http://pbs.twimg.com/media/ER3qUNxXUAE-UUN.jpg", :id 1233399505984376800, :expanded_url "https://twitter.com/Hack42/status/1233399612549017601/photo/1", :url "https://t.co/txtygM76rw", :display_url "pic.twitter.com/txtygM76rw", :indices [199 222], :id_str "1233399505984376833"} {:sizes {:thumb {:w 150, :h 150, :resize "crop"}, :small {:w 510, :h 680, :resize "fit"}, :large {:w 1536, :h 2048, :resize "fit"}, :medium {:w 900, :h 1200, :resize "fit"}}, :media_url_https "https://pbs.twimg.com/media/ER3qX2RW4AEhQfW.jpg", :type "photo", :media_url "http://pbs.twimg.com/media/ER3qX2RW4AEhQfW.jpg", :id 1233399568395591700, :expanded_url "https://twitter.com/Hack42/status/1233399612549017601/photo/1", :url "https://t.co/txtygM76rw", :display_url "pic.twitter.com/txtygM76rw", :indices [199 222], :id_str "1233399568395591681"}]}, :display_text_range [0 198], :truncated false, :retweeted false, :in_reply_to_user_id nil, :id_str "1233399612549017601", :favorited false, :user {:description "Hack42: hackerspace en computermuseum Arnhem. Een technische creatieve omgeving waar alles kan. Kom langs! Wat wordt jouw project?", :profile_link_color "250F7C", :profile_sidebar_border_color "FFFFFF", :is_translation_enabled true, :profile_image_url "http://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :profile_use_background_image true, :default_profile false, :profile_background_image_url "http://abs.twimg.com/images/themes/theme15/bg.png", :is_translator false, :profile_text_color "333333", :profile_banner_url "https://pbs.twimg.com/profile_banners/91565087/1497686456", :name "Hackerspace Arnhem", :profile_background_image_url_https "https://abs.twimg.com/images/themes/theme15/bg.png", :favourites_count 277, :screen_name "Hack42", :entities {:url {:urls [{:url "https://t.co/8YNbbxjeYQ", :expanded_url "http://hack42.nl/", :display_url "hack42.nl", :indices [0 23]}]}, :description {:urls []}}, :listed_count 78, :profile_image_url_https "https://pbs.twimg.com/profile_images/1101094129419849728/vypXoIBq_normal.jpg", :statuses_count 11721, :has_extended_profile false, :contributors_enabled false, :following true, :lang nil, :utc_offset nil, :notifications false, :default_profile_image false, :profile_background_color "02152E", :id 91565087, :follow_request_sent false, :url "https://t.co/8YNbbxjeYQ", :translator_type "regular", :time_zone nil, :profile_sidebar_fill_color "C8C8E6", :protected false, :profile_background_tile true, :id_str "91565087", :geo_enabled false, :location "ARNHEM, NL", :followers_count 1666, :friends_count 61, :verified false, :created_at "Sat Nov 21 12:49:38 +0000 2009"}, :retweet_count 2, :favorite_count 2, :created_at "Fri Feb 28 14:32:37 +0000 2020"}