mirror of
https://github.com/feeddeck/feeddeck.git
synced 2026-04-28 09:57:47 -05:00
Instead of using an `import_map.json` file to define the versions for dependencies, they are now defined directly within the import. Since the `import_map.json` file should not be used anymore and instead a `deno.json` file per function should be used, we decided to define them directly with the code. The overhead compared to a `deno.json` file per function shouldn't be that large and it makes using functions in a self-hosted setup easier.
236 lines
7.1 KiB
TypeScript
236 lines
7.1 KiB
TypeScript
import { SupabaseClient } from "jsr:@supabase/supabase-js@2";
|
|
import { FeedEntry } from "https://deno.land/x/rss@1.0.0/src/types/mod.ts";
|
|
import { unescape } from "https://raw.githubusercontent.com/lodash/lodash/4.17.21-es/lodash.js";
|
|
import { Redis } from "https://deno.land/x/redis@v0.32.0/mod.ts";
|
|
|
|
import { ISource } from "../models/source.ts";
|
|
import { IItem } from "../models/item.ts";
|
|
import { IProfile } from "../models/profile.ts";
|
|
import { utils } from "../utils/index.ts";
|
|
import { feedutils } from "./utils/index.ts";
|
|
|
|
export const getGooglenewsFeed = async (
|
|
_supabaseClient: SupabaseClient,
|
|
redisClient: Redis | undefined,
|
|
_profile: IProfile,
|
|
source: ISource,
|
|
feedData: string | undefined,
|
|
): Promise<{ source: ISource; items: IItem[] }> => {
|
|
if (!source.options?.googlenews || !source.options?.googlenews?.type) {
|
|
throw new feedutils.FeedValidationError("Invalid source options");
|
|
}
|
|
|
|
if (
|
|
source.options.googlenews.type === "url" &&
|
|
source.options.googlenews.url
|
|
) {
|
|
/**
|
|
* If the user selected type is url, we check if the url already points to
|
|
* the Google News RSS feed, if not we convert the url to the RSS feed url
|
|
* and use it later.
|
|
*/
|
|
if (
|
|
source.options.googlenews.url.startsWith("https://news.google.com/rss")
|
|
) {
|
|
/**
|
|
* Do nothing, since the user already provided an url to the Google News
|
|
* RSS feed.
|
|
*/
|
|
} else if (
|
|
source.options.googlenews.url.startsWith("https://news.google.com")
|
|
) {
|
|
source.options.googlenews.url = `https://news.google.com/rss${source.options.googlenews.url.replace(
|
|
"https://news.google.com",
|
|
"",
|
|
)}`;
|
|
}
|
|
} else if (
|
|
source.options.googlenews.type === "search" &&
|
|
source.options.googlenews.search &&
|
|
source.options.googlenews.ceid &&
|
|
source.options.googlenews.gl &&
|
|
source.options.googlenews.hl
|
|
) {
|
|
/**
|
|
* If the user selected type is earch we construct the RSS feed url with the
|
|
* search term and the user selected language and region.
|
|
*/
|
|
source.options.googlenews.url = `https://news.google.com/rss/search?q=${source.options.googlenews.search}&hl=${source.options.googlenews.hl}&gl=${source.options.googlenews.gl}&ceid=${source.options.googlenews.ceid}`;
|
|
} else {
|
|
throw new feedutils.FeedValidationError("Invalid source options");
|
|
}
|
|
|
|
/**
|
|
* Get the RSS for the provided `googlenews` url and parse it. If a feed
|
|
* doesn't contains a title we return an error.
|
|
*/
|
|
const feed = await feedutils.getAndParseFeed(
|
|
source.options.googlenews.url,
|
|
source,
|
|
feedData,
|
|
);
|
|
|
|
if (!feed.title.value) {
|
|
throw new Error("Invalid feed");
|
|
}
|
|
|
|
/**
|
|
* Generate a source id based on the user id, column id and the normalized
|
|
* `stackoverflow` url. Besides that we also set the source type to
|
|
* `stackoverflow` and set the title and link for the source.
|
|
*/
|
|
if (source.id === "") {
|
|
source.id = await generateSourceId(
|
|
source.userId,
|
|
source.columnId,
|
|
source.options.googlenews.url,
|
|
);
|
|
}
|
|
source.type = "googlenews";
|
|
source.title = feed.title.value;
|
|
if (feed.links.length > 0) {
|
|
source.link = feed.links[0];
|
|
}
|
|
source.icon = undefined;
|
|
|
|
/**
|
|
* Now that the source does contain all the required information we can start
|
|
* to generate the items for the source, by looping over all the feed entries.
|
|
*/
|
|
const items: IItem[] = [];
|
|
|
|
for (const [index, entry] of feed.entries.entries()) {
|
|
if (skipEntry(index, entry, source.updatedAt || 0)) {
|
|
continue;
|
|
}
|
|
|
|
let author = undefined;
|
|
// deno-lint-ignore no-explicit-any
|
|
if (entry.source && (entry.source as any).value) {
|
|
// deno-lint-ignore no-explicit-any
|
|
author = (entry.source as any).value;
|
|
}
|
|
|
|
let media = undefined;
|
|
if (redisClient) {
|
|
media = await getMedia(redisClient, entry);
|
|
}
|
|
|
|
/**
|
|
* Create the item object and add it to the `items` array.
|
|
*/
|
|
items.push({
|
|
id: await generateItemId(source.id, entry.id),
|
|
userId: source.userId,
|
|
columnId: source.columnId,
|
|
sourceId: source.id,
|
|
title: entry.title!.value!,
|
|
link: entry.links[0].href!,
|
|
media: media,
|
|
description: entry.description?.value
|
|
? unescape(entry.description.value)
|
|
: undefined,
|
|
author: author,
|
|
publishedAt: Math.floor(entry.published!.getTime() / 1000),
|
|
});
|
|
}
|
|
|
|
return { source, items };
|
|
};
|
|
|
|
/**
|
|
* `skipEntry` is used to determin if an entry should be skipped or not. When a
|
|
* entry in the RSS feed is skipped it will not be added to the database. An
|
|
* entry will be skipped when
|
|
* - it is not within the first 50 entries of the feed, because we only keep the
|
|
* last 50 items of each source in our delete logic.
|
|
* - the entry does not contain a title, a link or a published date.
|
|
* - the published date of the entry is older than the last update date of the
|
|
* source minus 10 seconds.
|
|
*/
|
|
const skipEntry = (
|
|
index: number,
|
|
entry: FeedEntry,
|
|
sourceUpdatedAt: number,
|
|
): boolean => {
|
|
if (index === 50) {
|
|
return true;
|
|
}
|
|
|
|
if (
|
|
!entry.title?.value ||
|
|
entry.links.length === 0 ||
|
|
!entry.links[0].href ||
|
|
!entry.published
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
if (Math.floor(entry.published.getTime() / 1000) <= sourceUpdatedAt - 10) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
};
|
|
|
|
/**
|
|
* `generateSourceId` generates a unique source id based on the user id, column
|
|
* id and the link of the RSS feed. We use the MD5 algorithm for the link to
|
|
* generate the id.
|
|
*/
|
|
const generateSourceId = async (
|
|
userId: string,
|
|
columnId: string,
|
|
link: string,
|
|
): Promise<string> => {
|
|
return `googlenews-${userId}-${columnId}-${await utils.md5(link)}`;
|
|
};
|
|
|
|
/**
|
|
* `generateItemId` generates a unique item id based on the source id and the
|
|
* identifier of the item. We use the MD5 algorithm for the identifier, which
|
|
* can be the link of the item or the id of the item.
|
|
*/
|
|
const generateItemId = async (
|
|
sourceId: string,
|
|
identifier: string,
|
|
): Promise<string> => {
|
|
return `${sourceId}-${await utils.md5(identifier)}`;
|
|
};
|
|
|
|
/**
|
|
* `getMedia` returns the icon of the source if it exists. The icon can then be
|
|
* used within the `item.media` field. If we are not able to get an icon for the
|
|
* source we return `undefined`.
|
|
*
|
|
* To get the item we have to use the `getFavicon` function against the
|
|
* `source.url` of the `entry`. Since this function is very expensive we cache
|
|
* the result in Redis and check if an icon for the `source.url` is already
|
|
* cached before we call the `getFavicon` function.
|
|
*/
|
|
const getMedia = async (
|
|
redisClient: Redis,
|
|
entry: FeedEntry,
|
|
): Promise<string | undefined> => {
|
|
try {
|
|
if (entry.source?.url) {
|
|
const cacheKey = `scraper-googlenews-${entry.source?.url}`;
|
|
|
|
const cachedMediaURL = await redisClient.get(cacheKey);
|
|
if (cachedMediaURL) {
|
|
return cachedMediaURL;
|
|
}
|
|
|
|
const favicon = await feedutils.getFavicon(entry.source?.url);
|
|
if (favicon && favicon.url.startsWith("https://")) {
|
|
await redisClient.set(cacheKey, favicon.url);
|
|
return favicon.url;
|
|
}
|
|
}
|
|
|
|
return undefined;
|
|
} catch (_) {
|
|
return undefined;
|
|
}
|
|
};
|