[4chan] Add Support for 4chan (#142)

This commit adds support for 4chan. This means that 4chan can be
selected as a new source type. When the 4chan source is selected a user
can select a board from which he wants to get the RSS feed.
This commit is contained in:
Rico Berger
2024-02-14 13:05:10 +01:00
committed by GitHub
parent 0b077ae973
commit 689d3bd39b
17 changed files with 957 additions and 2 deletions

View File

@@ -18,6 +18,7 @@ import { getGithubFeed } from './github.ts';
import { IProfile } from '../models/profile.ts';
import { getNitterFeed } from './nitter.ts';
import { getMastodonFeed } from './mastodon.ts';
import { getFourChanFeed } from './fourchan.ts';
// import { getXFeed } from './x.ts';
/**
@@ -33,6 +34,14 @@ export const getFeed = async (
feedData: string | undefined,
): Promise<{ source: ISource; items: IItem[] }> => {
switch (source.type) {
case 'fourchan':
return await getFourChanFeed(
supabaseClient,
redisClient,
profile,
source,
feedData,
);
case 'github':
return await getGithubFeed(
supabaseClient,

View File

@@ -0,0 +1,203 @@
import { SupabaseClient } from '@supabase/supabase-js';
import { FeedEntry } from 'rss/types';
import { Redis } from 'redis';
import { unescape } from 'lodash';
import { IItem } from '../models/item.ts';
import { ISource } from '../models/source.ts';
import { feedutils } from './utils/index.ts';
import { IProfile } from '../models/profile.ts';
import { utils } from '../utils/index.ts';
export const getFourChanFeed = async (
_supabaseClient: SupabaseClient,
_redisClient: Redis | undefined,
_profile: IProfile,
source: ISource,
feedData: string | undefined,
): Promise<{ source: ISource; items: IItem[] }> => {
/**
* To get a RSS feed the `source` must have a `4chan` option. This option is
* then passed to the `getAndParseFeed` function of the `feedutils` package to
* get the feed.
*/
if (!source.options?.fourchan) {
throw new feedutils.FeedValidationError('Invalid source options');
}
const feed = await feedutils.getAndParseFeed(
`https://boards.4chan.org/${source.options.fourchan}/index.rss`,
source,
feedData,
);
/**
* If the feed does not have a title we consider it invalid and throw an
* error.
*/
if (!feed.title.value) {
throw new Error('Invalid feed');
}
/**
* If the provided source does not already have an id we generate one using
* the `generateSourceId` function. The id of a source is a combination of the
* user id, the column id and the link of the RSS feed. We also set the type
* of the source to `rss` and the title to the title of the feed.
*/
if (source.id === '') {
source.id = await generateSourceId(
source.userId,
source.columnId,
source.options.fourchan,
);
}
source.type = 'fourchan';
source.title = feed.title.value;
/**
* If the feed contains a list of links we are using the first one as the link
* for our source.
*/
if (feed.links.length > 0) {
source.link = feed.links[0];
}
/**
* Now that the source contains all the required fields we can loop through
* all the items and add them for the source.
*/
const items: IItem[] = [];
for (const [index, entry] of feed.entries.entries()) {
if (skipEntry(index, entry, source.updatedAt || 0)) {
continue;
}
/**
* Each item need a unique id which is generated using the `generateItemId`
* function. The id is a combination of the source id and the id of the
* entry or if the entry does not have an id we use the link of the first
* link of the entry.
*/
let itemId = '';
if (entry.id) {
itemId = await generateItemId(source.id, entry.id);
} else {
itemId = await generateItemId(source.id, entry.links[0].href!);
}
/**
* Create the item object and add it to the `items` array.
*/
items.push({
id: itemId,
userId: source.userId,
columnId: source.columnId,
sourceId: source.id,
title: entry.title!.value!,
link: entry.links[0].href!,
media: getMedia(entry),
description: getItemDescription(entry),
author: entry.author?.name,
publishedAt: Math.floor(entry.published!.getTime() / 1000),
});
}
return { source, items };
};
/**
* `skipEntry` is used to determin if an entry should be skipped or not. When a
* entry in the RSS feed is skipped it will not be added to the database. An
* entry will be skipped when
* - it is not within the first 50 entries of the feed, because we only keep the
* last 50 items of each source in our delete logic.
* - the entry does not contain a title, a link or a published / updated date.
* - the published date of the entry is older than the last update
* date of the source minus 10 seconds.
*/
const skipEntry = (
index: number,
entry: FeedEntry,
sourceUpdatedAt: number,
): boolean => {
if (index === 50) {
return true;
}
if (
!entry.title?.value ||
(entry.links.length === 0 || !entry.links[0].href) ||
!entry.published
) {
return true;
}
if (
entry.published &&
Math.floor(entry.published.getTime() / 1000) <= (sourceUpdatedAt - 10)
) {
return true;
}
return false;
};
/**
* `generateSourceId` generates a unique source id based on the user id, column
* id and the link of the RSS feed. We use the MD5 algorithm for the link to
* generate the id.
*/
const generateSourceId = async (
userId: string,
columnId: string,
link: string,
): Promise<string> => {
return `fourchan-${userId}-${columnId}-${await utils.md5(link)}`;
};
/**
* `generateItemId` generates a unique item id based on the source id and the
* identifier of the item. We use the MD5 algorithm for the identifier, which
* can be the link of the item or the id of the item.
*/
const generateItemId = async (
sourceId: string,
identifier: string,
): Promise<string> => {
return `${sourceId}-${await utils.md5(identifier)}`;
};
/**
* `getItemDescription` returns the description of an item based on the provided
* description.
*/
const getItemDescription = (entry: FeedEntry): string | undefined => {
if (entry.description?.value) {
return unescape(entry.description?.value);
}
return undefined;
};
/**
* `getMedia` returns a media url for the provided feed `entry` (item). To get
* the media we check if the description of the entry contains an image.
*/
const getMedia = (entry: FeedEntry): string | undefined => {
if (entry.description?.value) {
const matches = /<img[^>]+\bsrc=["']([^"']+)["']/.exec(
unescape(entry.description.value),
);
if (
matches && matches.length == 2 &&
(matches[1].startsWith('https://') || matches[1].startsWith('http://')) &&
!matches[1].endsWith('.svg')
) {
return matches[1];
}
}
return undefined;
};

View File

@@ -0,0 +1,134 @@
import { createClient } from '@supabase/supabase-js';
import {
assertSpyCall,
assertSpyCalls,
returnsNext,
stub,
} from 'std/testing/mock';
import { ISource } from '../models/source.ts';
import { IProfile } from '../models/profile.ts';
import { getFourChanFeed } from './fourchan.ts';
import { utils } from '../utils/index.ts';
import { feedutils } from './utils/index.ts';
const supabaseClient = createClient('http://localhost:54321', 'test123');
const mockProfile: IProfile = {
id: '',
tier: 'free',
createdAt: 0,
updatedAt: 0,
};
const mockSource: ISource = {
id: '',
columnId: 'mycolumn',
userId: 'myuser',
type: 'medium',
title: '',
};
const response = `<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
<channel>
<title>/v/ - Video Games</title>
<link>http://boards.4chan.org/v/./</link>
<description>Threads on /v/ - Video Games at 4chan.org.</description>
<atom:link href="http://boards.4chan.org/v/index.rss" rel="self" type="application/rss+xml" />
<item>
<title>Will the Cyberpunk sequel manage to get to the level of hype...</title>
<link>http://boards.4chan.org/v/thread/666978687#666978687</link>
<guid>http://boards.4chan.org/v/thread/666978687</guid>
<comments>http://boards.4chan.org/v/thread/666978687</comments>
<pubDate>Tue, 13 Feb 2024 15:59:04 EST</pubDate>
<dc:creator>Anonymous</dc:creator>
<description><![CDATA[<a href='http://i.4cdn.org/v/1707857944691136.png' target=_blank><img style='float:left;margin:8px' border=0 src='http://i.4cdn.org/v/1707857944691136s.jpg'></a> Will the Cyberpunk sequel manage to get to the level of hype that 2077 did? 2077 is basically a masterpiece now, but audiences won&#039;t forget the state it launched in. That&#039;s definitely going to affect the sequel.]]></description>
</item>
<item>
<title>new games can't have this feel</title>
<link>http://boards.4chan.org/v/thread/666978663#666978663</link>
<guid>http://boards.4chan.org/v/thread/666978663</guid>
<comments>http://boards.4chan.org/v/thread/666978663</comments>
<pubDate>Tue, 13 Feb 2024 15:58:46 EST</pubDate>
<dc:creator>Anonymous</dc:creator>
<description><![CDATA[<a href='http://i.4cdn.org/v/1707857926060804.jpg' target=_blank><img style='float:left;margin:8px' border=0 src='http://i.4cdn.org/v/1707857926060804s.jpg'></a> new games can&#039;t have this feel]]></description>
</item>
</channel>
</rss>`;
Deno.test('getFourChanFeed', async () => {
const fetchWithTimeoutSpy = stub(
utils,
'fetchWithTimeout',
returnsNext([
new Promise((resolve) => {
resolve(new Response(response, { status: 200 }));
}),
]),
);
try {
const { source, items } = await getFourChanFeed(
supabaseClient,
undefined,
mockProfile,
{ ...mockSource, options: { fourchan: 'v' } },
undefined,
);
feedutils.assertEqualsSource(source, {
'id': 'fourchan-myuser-mycolumn-9e3669d19b675bd57058fd4664205d2a',
'columnId': 'mycolumn',
'userId': 'myuser',
'type': 'fourchan',
'title': '/v/ - Video Games',
'options': {
'fourchan': 'v',
},
'link': 'http://boards.4chan.org/v/./',
});
feedutils.assertEqualsItems(items, [
{
'id':
'fourchan-myuser-mycolumn-9e3669d19b675bd57058fd4664205d2a-4cedc3982b91056cf239c4a546aceca7',
'userId': 'myuser',
'columnId': 'mycolumn',
'sourceId': 'fourchan-myuser-mycolumn-9e3669d19b675bd57058fd4664205d2a',
'title':
'Will the Cyberpunk sequel manage to get to the level of hype...',
'link': 'http://boards.4chan.org/v/thread/666978687#666978687',
'media': 'http://i.4cdn.org/v/1707857944691136s.jpg',
'description':
"<a href='http://i.4cdn.org/v/1707857944691136.png' target=_blank><img style='float:left;margin:8px' border=0 src='http://i.4cdn.org/v/1707857944691136s.jpg'></a> Will the Cyberpunk sequel manage to get to the level of hype that 2077 did? 2077 is basically a masterpiece now, but audiences won&#039;t forget the state it launched in. That&#039;s definitely going to affect the sequel.",
'author': 'Anonymous',
'publishedAt': 1707857944,
},
{
'id':
'fourchan-myuser-mycolumn-9e3669d19b675bd57058fd4664205d2a-2d682afe971ddf86fb31f588bbc9b808',
'userId': 'myuser',
'columnId': 'mycolumn',
'sourceId': 'fourchan-myuser-mycolumn-9e3669d19b675bd57058fd4664205d2a',
'title': "new games can't have this feel",
'link': 'http://boards.4chan.org/v/thread/666978663#666978663',
'media': 'http://i.4cdn.org/v/1707857926060804s.jpg',
'description':
"<a href='http://i.4cdn.org/v/1707857926060804.jpg' target=_blank><img style='float:left;margin:8px' border=0 src='http://i.4cdn.org/v/1707857926060804s.jpg'></a> new games can&#039;t have this feel",
'author': 'Anonymous',
'publishedAt': 1707857926,
},
]);
} finally {
fetchWithTimeoutSpy.restore();
}
assertSpyCall(fetchWithTimeoutSpy, 0, {
args: [
'https://boards.4chan.org/v/index.rss',
{ method: 'get' },
5000,
],
returned: new Promise((resolve) => {
resolve(new Response(response, { status: 200 }));
}),
});
assertSpyCalls(fetchWithTimeoutSpy, 1);
});

View File

@@ -3,6 +3,7 @@ import { ISourceOptionsGoogleNews } from './sources/googlenews.ts';
import { ISourceOptionsStackOverflow } from './sources/stackoverflow.ts';
export type TSourceType =
| 'fourchan'
| 'github'
| 'googlenews'
| 'lemmy'
@@ -32,6 +33,7 @@ export interface ISource {
}
export interface ISourceOptions {
fourchan?: string;
github?: ISourceOptionsGithub;
googlenews?: ISourceOptionsGoogleNews;
lemmy?: string;