2023-02-21 19:55:31 -05:00
# frozen_string_literal: true
2017-04-25 02:47:31 +02:00
require 'rails_helper'
2024-03-13 04:39:26 -04:00
RSpec . describe FetchLinkCardService do
2021-11-05 23:23:05 +01:00
subject { described_class . new }
2017-05-17 00:41:15 +02:00
2023-07-28 23:02:08 +02:00
let ( :html ) { '<!doctype html><title>Hello world</title>' }
let ( :oembed_cache ) { nil }
2017-04-25 02:47:31 +02:00
before do
2023-07-28 23:02:08 +02:00
stub_request ( :get , 'http://example.com/html' ) . to_return ( headers : { 'Content-Type' = > 'text/html' } , body : html )
stub_request ( :get , 'http://example.com/not-found' ) . to_return ( status : 404 , headers : { 'Content-Type' = > 'text/html' } , body : html )
stub_request ( :get , 'http://example.com/text' ) . to_return ( status : 404 , headers : { 'Content-Type' = > 'text/plain' } , body : 'Hello' )
stub_request ( :get , 'http://example.com/redirect' ) . to_return ( status : 302 , headers : { 'Location' = > 'http://example.com/html' } )
stub_request ( :get , 'http://example.com/redirect-to-404' ) . to_return ( status : 302 , headers : { 'Location' = > 'http://example.com/not-found' } )
stub_request ( :get , 'http://example.com/oembed?url=http://example.com/html' ) . to_return ( headers : { 'Content-Type' = > 'application/json' } , body : '{ "version": "1.0", "type": "link", "title": "oEmbed title" }' )
stub_request ( :get , 'http://example.com/oembed?format=json&url=http://example.com/html' ) . to_return ( headers : { 'Content-Type' = > 'application/json' } , body : '{ "version": "1.0", "type": "link", "title": "oEmbed title" }' )
stub_request ( :get , 'http://example.xn--fiqs8s' )
stub_request ( :get , 'http://example.com/日本語' )
stub_request ( :get , 'http://example.com/test?data=file.gpx%5E1' )
stub_request ( :get , 'http://example.com/test-' )
2017-07-05 21:54:21 +09:00
stub_request ( :get , 'http://example.com/sjis' ) . to_return ( request_fixture ( 'sjis.txt' ) )
2017-07-09 05:44:31 +09:00
stub_request ( :get , 'http://example.com/sjis_with_wrong_charset' ) . to_return ( request_fixture ( 'sjis_with_wrong_charset.txt' ) )
stub_request ( :get , 'http://example.com/koi8-r' ) . to_return ( request_fixture ( 'koi8-r.txt' ) )
2018-12-17 19:19:45 +01:00
stub_request ( :get , 'http://example.com/windows-1251' ) . to_return ( request_fixture ( 'windows-1251.txt' ) )
2024-06-21 14:51:10 +02:00
stub_request ( :get , 'http://example.com/low_confidence_latin1' ) . to_return ( request_fixture ( 'low_confidence_latin1.txt' ) )
2024-07-05 13:54:38 +02:00
stub_request ( :get , 'http://example.com/latin1_posing_as_utf8_broken' ) . to_return ( request_fixture ( 'latin1_posing_as_utf8_broken.txt' ) )
stub_request ( :get , 'http://example.com/latin1_posing_as_utf8_recoverable' ) . to_return ( request_fixture ( 'latin1_posing_as_utf8_recoverable.txt' ) )
2024-06-27 16:41:03 +02:00
stub_request ( :get , 'http://example.com/aergerliche-umlaute' ) . to_return ( request_fixture ( 'redirect_with_utf8_url.txt' ) )
2024-07-05 15:28:52 +02:00
stub_request ( :get , 'http://example.com/page_without_title' ) . to_return ( request_fixture ( 'page_without_title.txt' ) )
2024-07-09 15:11:34 +02:00
stub_request ( :get , 'http://example.com/long_canonical_url' ) . to_return ( request_fixture ( 'long_canonical_url.txt' ) )
2017-05-17 00:41:15 +02:00
2023-07-28 23:02:08 +02:00
Rails . cache . write ( 'oembed_endpoint:example.com' , oembed_cache ) if oembed_cache
2017-05-17 00:41:15 +02:00
subject . call ( status )
end
2023-05-03 23:49:08 -04:00
context 'with a local status' do
2023-07-28 23:02:08 +02:00
context 'with URL of a regular HTML page' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/html' ) }
it 'creates preview card' do
expect ( status . preview_card ) . to_not be_nil
expect ( status . preview_card . url ) . to eq 'http://example.com/html'
expect ( status . preview_card . title ) . to eq 'Hello world'
end
end
context 'with URL of a page with no title' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/html' ) }
let ( :html ) { '<!doctype html><title></title>' }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
context 'with a URL of a plain-text page' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/text' ) }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
context 'with multiple URLs' do
let ( :status ) { Fabricate ( :status , text : 'ftp://example.com http://example.com/html http://example.com/text' ) }
it 'fetches the first valid URL' do
expect ( a_request ( :get , 'http://example.com/html' ) ) . to have_been_made
end
it 'does not fetch the second valid URL' do
expect ( a_request ( :get , 'http://example.com/text/' ) ) . to_not have_been_made
end
end
context 'with a redirect URL' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/redirect' ) }
it 'follows redirect' do
expect ( a_request ( :get , 'http://example.com/redirect' ) ) . to have_been_made . once
expect ( a_request ( :get , 'http://example.com/html' ) ) . to have_been_made . once
end
it 'creates preview card' do
expect ( status . preview_card ) . to_not be_nil
expect ( status . preview_card . url ) . to eq 'http://example.com/html'
expect ( status . preview_card . title ) . to eq 'Hello world'
end
end
context 'with a broken redirect URL' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/redirect-to-404' ) }
it 'follows redirect' do
expect ( a_request ( :get , 'http://example.com/redirect-to-404' ) ) . to have_been_made . once
expect ( a_request ( :get , 'http://example.com/not-found' ) ) . to have_been_made . once
end
2024-06-27 16:41:03 +02:00
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
context 'with a redirect URL with faulty encoding' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/aergerliche-umlaute' ) }
2023-07-28 23:02:08 +02:00
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
2024-07-05 15:28:52 +02:00
context 'with a page that has no title' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/page_without_title' ) }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
2023-07-28 23:02:08 +02:00
context 'with a 404 URL' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/not-found' ) }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
context 'with an IDN URL' do
2017-05-17 00:41:15 +02:00
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.中国' ) }
2023-07-28 23:02:08 +02:00
it 'fetches the URL' do
expect ( a_request ( :get , 'http://example.xn--fiqs8s/' ) ) . to have_been_made . once
2017-05-17 00:41:15 +02:00
end
end
2017-07-05 21:54:21 +09:00
2023-07-28 23:02:08 +02:00
context 'with a URL of a page in Shift JIS encoding' do
2017-07-05 21:54:21 +09:00
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/sjis' ) }
2023-07-28 23:02:08 +02:00
it 'decodes the HTML' do
2023-11-13 10:58:28 +01:00
expect ( status . preview_card . title ) . to eq ( 'SJISのページ' )
2017-07-09 05:44:31 +09:00
end
end
2023-07-28 23:02:08 +02:00
context 'with a URL of a page in Shift JIS encoding labeled as UTF-8' do
2017-07-09 05:44:31 +09:00
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/sjis_with_wrong_charset' ) }
2023-07-28 23:02:08 +02:00
it 'decodes the HTML despite the wrong charset header' do
2023-11-13 10:58:28 +01:00
expect ( status . preview_card . title ) . to eq ( 'SJISのページ' )
2017-07-09 05:44:31 +09:00
end
end
2023-07-28 23:02:08 +02:00
context 'with a URL of a page in KOI8-R encoding' do
2017-07-09 05:44:31 +09:00
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/koi8-r' ) }
2023-07-28 23:02:08 +02:00
it 'decodes the HTML' do
2023-11-13 10:58:28 +01:00
expect ( status . preview_card . title ) . to eq ( 'Московя начинаетъ только въ XVI ст. привлекать внимане иностранцевъ.' )
2017-07-05 21:54:21 +09:00
end
end
2017-09-15 01:03:20 +09:00
2023-07-28 23:02:08 +02:00
context 'with a URL of a page in Windows-1251 encoding' do
2018-12-17 19:19:45 +01:00
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/windows-1251' ) }
2023-07-28 23:02:08 +02:00
it 'decodes the HTML' do
2023-11-13 10:58:28 +01:00
expect ( status . preview_card . title ) . to eq ( 'сэмпл текст' )
2018-12-17 19:19:45 +01:00
end
end
2024-06-21 14:51:10 +02:00
context 'with a URL of a page in ISO-8859-1 encoding, that charlock_holmes cannot detect' do
2024-07-05 13:54:38 +02:00
context 'when encoding in http header is correct' do
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/low_confidence_latin1' ) }
2024-06-21 14:51:10 +02:00
2024-07-05 13:54:38 +02:00
it 'decodes the HTML' do
expect ( status . preview_card . title ) . to eq ( " Tofu á l'orange " )
end
end
context 'when encoding in http header is incorrect' do
context 'when encoding problems appear in unrelated tags' do
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/latin1_posing_as_utf8_recoverable' ) }
it 'decodes the HTML' do
expect ( status . preview_card . title ) . to eq ( 'Tofu with orange sauce' )
end
end
context 'when encoding problems appear in title tag' do
let ( :status ) { Fabricate ( :status , text : 'Check out http://example.com/latin1_posing_as_utf8_broken' ) }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
2024-06-21 14:51:10 +02:00
end
end
2023-07-28 23:02:08 +02:00
context 'with a Japanese path URL' do
2017-09-15 01:03:20 +09:00
let ( :status ) { Fabricate ( :status , text : 'テストhttp://example.com/日本語' ) }
2023-07-28 23:02:08 +02:00
it 'fetches the URL' do
expect ( a_request ( :get , 'http://example.com/日本語' ) ) . to have_been_made . once
2017-09-15 01:03:20 +09:00
end
end
2018-02-11 22:49:18 +00:00
2023-07-28 23:02:08 +02:00
context 'with a hyphen-suffixed URL' do
2018-02-11 22:49:18 +00:00
let ( :status ) { Fabricate ( :status , text : 'test http://example.com/test-' ) }
2023-07-28 23:02:08 +02:00
it 'fetches the URL' do
expect ( a_request ( :get , 'http://example.com/test-' ) ) . to have_been_made . once
2018-02-11 22:49:18 +00:00
end
end
2021-03-04 00:12:26 +01:00
2023-07-28 23:02:08 +02:00
context 'with a caret-suffixed URL' do
let ( :status ) { Fabricate ( :status , text : 'test http://example.com/test?data=file.gpx^1' ) }
it 'fetches the URL' do
expect ( a_request ( :get , 'http://example.com/test?data=file.gpx%5E1' ) ) . to have_been_made . once
end
it 'does not strip the caret before fetching' do
expect ( a_request ( :get , 'http://example.com/test?data=file.gpx' ) ) . to_not have_been_made
end
end
context 'with a non-isolated URL' do
2021-03-04 00:12:26 +01:00
let ( :status ) { Fabricate ( :status , text : 'testhttp://example.com/sjis' ) }
2023-07-28 23:02:08 +02:00
it 'does not fetch URLs not isolated from their surroundings' do
2021-03-04 00:12:26 +01:00
expect ( a_request ( :get , 'http://example.com/sjis' ) ) . to_not have_been_made
end
end
2023-06-01 12:14:49 +02:00
2023-07-28 23:02:08 +02:00
context 'with a URL of a page with oEmbed support' do
let ( :html ) { '<!doctype html><title>Hello world</title><link rel="alternate" type="application/json+oembed" href="http://example.com/oembed?url=http://example.com/html">' }
let ( :status ) { Fabricate ( :status , text : 'http://example.com/html' ) }
2023-06-01 12:14:49 +02:00
2023-07-28 23:02:08 +02:00
it 'fetches the oEmbed URL' do
expect ( a_request ( :get , 'http://example.com/oembed?url=http://example.com/html' ) ) . to have_been_made . once
end
it 'creates preview card' do
expect ( status . preview_card ) . to_not be_nil
expect ( status . preview_card . url ) . to eq 'http://example.com/html'
expect ( status . preview_card . title ) . to eq 'oEmbed title'
end
context 'when oEmbed endpoint cache populated' do
let ( :oembed_cache ) { { endpoint : 'http://example.com/oembed?format=json&url={url}' , format : :json } }
it 'uses the cached oEmbed response' do
expect ( a_request ( :get , 'http://example.com/oembed?url=http://example.com/html' ) ) . to_not have_been_made
expect ( a_request ( :get , 'http://example.com/oembed?format=json&url=http://example.com/html' ) ) . to have_been_made
end
it 'creates preview card' do
expect ( status . preview_card ) . to_not be_nil
expect ( status . preview_card . url ) . to eq 'http://example.com/html'
expect ( status . preview_card . title ) . to eq 'oEmbed title'
end
end
# If the original HTML URL for whatever reason (e.g. DOS protection) redirects to
# an error page, we can still use the cached oEmbed but should not use the
# redirect URL on the card.
context 'when oEmbed endpoint cache populated but page returns 404' do
let ( :status ) { Fabricate ( :status , text : 'http://example.com/redirect-to-404' ) }
let ( :oembed_cache ) { { endpoint : 'http://example.com/oembed?url=http://example.com/html' , format : :json } }
it 'uses the cached oEmbed response' do
expect ( a_request ( :get , 'http://example.com/oembed?url=http://example.com/html' ) ) . to have_been_made
end
it 'creates preview card' do
expect ( status . preview_card ) . to_not be_nil
expect ( status . preview_card . title ) . to eq 'oEmbed title'
end
it 'uses the original URL' do
expect ( status . preview_card & . url ) . to eq 'http://example.com/redirect-to-404'
end
2023-06-01 12:14:49 +02:00
end
end
2024-07-09 15:11:34 +02:00
context 'with a URL of a page that includes a canonical URL too long for PostgreSQL unique indexes' do
let ( :status ) { Fabricate ( :status , text : 'test http://example.com/long_canonical_url' ) }
it 'does not create a preview card' do
expect ( status . preview_card ) . to be_nil
end
end
2017-04-25 02:47:31 +02:00
end
2023-05-03 23:49:08 -04:00
context 'with a remote status' do
2023-07-26 03:44:51 -04:00
let ( :status ) do
Fabricate ( :status , account : Fabricate ( :account , domain : 'example.com' ) , text : <<-TEXT)
Habt ihr ein paar gute Links zu < a > foo < / a>
#<span class="tag"><a href="https://quitter.se/tag/wannacry" target="_blank" rel="tag noopener noreferrer" title="https://quitter.se/tag/wannacry">Wannacry</a></span> herumfliegen?
2023-07-28 23:02:08 +02:00
Ich will mal unter < br > < a href = " http://example.com/not-found " target = " _blank " rel = " noopener noreferrer " title = " http://example.com/not-found " > http : / /ex ample . com / not - found < / a> was sammeln. !
2023-07-26 03:44:51 -04:00
< a href = " http://sn.jonkman.ca/group/416/id " target = " _blank " rel = " noopener noreferrer " title = " http://sn.jonkman.ca/group/416/id " > security < / a>
TEXT
end
2017-05-17 00:41:15 +02:00
it 'parses out URLs' do
2023-07-28 23:02:08 +02:00
expect ( a_request ( :get , 'http://example.com/not-found' ) ) . to have_been_made . once
2017-05-17 00:41:15 +02:00
end
2017-04-25 02:47:31 +02:00
2017-05-17 00:41:15 +02:00
it 'ignores URLs to hashtags' do
2019-10-01 04:54:10 +02:00
expect ( a_request ( :get , 'https://quitter.se/tag/wannacry' ) ) . to_not have_been_made
2017-05-17 00:41:15 +02:00
end
2017-04-25 02:47:31 +02:00
end
end