Files
feeddeck/supabase/functions/_shared/feed/reddit.ts
Rico Berger 9e59439226 [core] Add Tests for Sources (#98)
This commit adds tests for all available sources.

This commit also fixes the parsing of Atom feeds for the RSS source,
where the `dc:date` field must be used for the `publishedAt` field.
2023-12-12 18:50:29 +01:00

219 lines
6.4 KiB
TypeScript

import { SupabaseClient } from '@supabase/supabase-js';
import { parseFeed } from 'rss';
import { Md5 } from 'std/md5';
import { FeedEntry } from 'rss/types';
import { Redis } from 'redis';
import { unescape } from 'lodash';
import { IItem } from '../models/item.ts';
import { ISource } from '../models/source.ts';
import { IProfile } from '../models/profile.ts';
import { utils } from '../utils/index.ts';
/**
* `isRedditUrl` checks if the provided `url` is a valid Reddit url. A url is
* considered valid if the hostname starts with `reddit.com`.
*/
export const isRedditUrl = (url: string): boolean => {
const parsedUrl = new URL(url);
return parsedUrl.hostname.endsWith('reddit.com');
};
export const getRedditFeed = async (
_supabaseClient: SupabaseClient,
_redisClient: Redis | undefined,
_profile: IProfile,
source: ISource,
): Promise<{ source: ISource; items: IItem[] }> => {
if (!source.options?.reddit) {
throw new Error('Invalid source options');
}
if (
source.options.reddit.startsWith('/r/') ||
source.options.reddit.startsWith('/u/')
) {
source.options.reddit =
`https://www.reddit.com${source.options.reddit}.rss`;
} else if (isRedditUrl(source.options.reddit)) {
if (!source.options.reddit.endsWith('.rss')) {
source.options.reddit = `${source.options.reddit}.rss`;
}
}
/**
* Get the RSS for the provided `youtube` url and parse it. If a feed doesn't
* contains an item we return an error.
*/
const response = await utils.fetchWithTimeout(source.options.reddit, {
method: 'get',
}, 5000);
const xml = await response.text();
utils.log('debug', 'Add source', {
sourceType: 'reddit',
requestUrl: source.options.reddit,
responseStatus: response.status,
});
const feed = await parseFeed(xml);
if (!feed.title.value) {
throw new Error('Invalid feed');
}
/**
* Generate a source id based on the user id, column id and the normalized
* `youtube` url. Besides that we also set the source type to `youtube` and
* set the title and link for the source.
*/
if (source.id === '') {
source.id = generateSourceId(
source.userId,
source.columnId,
source.options.reddit,
);
}
source.type = 'reddit';
source.title = feed.title.value;
if (feed.links.length > 0) {
source.link = feed.links[0];
}
/**
* Now that the source does contain all the required information we can start
* to generate the items for the source, by looping over all the feed entries.
*/
const items: IItem[] = [];
for (const [index, entry] of feed.entries.entries()) {
if (skipEntry(index, entry, source.updatedAt || 0)) {
continue;
}
/**
* Each item need a unique id which is generated using the `generateItemId`
* function. The id is a combination of the source id and the id of the
* entry or if the entry does not have an id we use the link of the first
* link of the entry.
*/
let itemId = '';
if (entry.id != '') {
itemId = generateItemId(source.id, entry.id);
} else if (entry.links.length > 0 && entry.links[0].href) {
itemId = generateItemId(source.id, entry.links[0].href);
} else {
continue;
}
/**
* Create the item object and add it to the `items` array.
*/
items.push({
id: itemId,
userId: source.userId,
columnId: source.columnId,
sourceId: source.id,
title: entry.title!.value!,
link: entry.links[0].href!,
media: getMedia(entry),
description: getDescription(entry),
author: entry.author?.name,
publishedAt: Math.floor(entry.published!.getTime() / 1000),
});
}
return { source, items };
};
/**
* `skipEntry` is used to determin if an entry should be skipped or not. When a
* entry in the RSS feed is skipped it will not be added to the database. An
* entry will be skipped when
* - it is not within the first 50 entries of the feed, because we only keep the
* last 50 items of each source in our delete logic.
* - the entry does not contain a title, a link or a published date.
* - the published date of the entry is older than the last update date of the
* source minus 10 seconds.
*/
const skipEntry = (
index: number,
entry: FeedEntry,
sourceUpdatedAt: number,
): boolean => {
if (index === 50) {
return true;
}
if (
!entry.title?.value ||
(entry.links.length === 0 || !entry.links[0].href) || !entry.published
) {
return true;
}
if (Math.floor(entry.published.getTime() / 1000) <= (sourceUpdatedAt - 10)) {
return true;
}
return false;
};
/**
* `generateSourceId` generates a unique source id based on the user id, column
* id and the link of the RSS feed. We use the MD5 algorithm for the link to
* generate the id.
*/
const generateSourceId = (
userId: string,
columnId: string,
link: string,
): string => {
return `reddit-${userId}-${columnId}-${new Md5().update(link).toString()}`;
};
/**
* `generateItemId` generates a unique item id based on the source id and the
* identifier of the item. We use the MD5 algorithm for the identifier, which
* can be the link of the item or the id of the item.
*/
const generateItemId = (sourceId: string, identifier: string): string => {
return `${sourceId}-${new Md5().update(identifier).toString()}`;
};
/**
* `getDescription` returns the description for a feed entry. If the entry does
* not contain a description we return `undefined`. Some Reddit feed items are
* containing a table, which we have to remove from the description, to improve
* the rendering in the Flutter app.
*/
const getDescription = (entry: FeedEntry): string | undefined => {
if (entry.content?.value) {
const content = unescape(entry.content.value);
return content.replaceAll('<table>', '').replaceAll('<tr>', '').replaceAll(
'<td>',
'',
).replaceAll('</table>', '').replaceAll('</tr>', '').replaceAll(
'</td>',
'',
);
}
return undefined;
};
/**
* `getMedia` returns the media for a feed entry. If the entry does not contain
* a media we return `undefined`. Some Reddit feed items are containing a
* thumbnail, which we can use as media.
*/
const getMedia = (entry: FeedEntry): string | undefined => {
if (
// deno-lint-ignore no-explicit-any
(entry as any)['media:thumbnail'] && (entry as any)['media:thumbnail'].url
) {
// deno-lint-ignore no-explicit-any
return (entry as any)['media:thumbnail'].url;
}
return undefined;
};