actual/node_modules/@docusaurus/utils/lib/markdownUtils.js

"use strict";
/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
Object.defineProperty(exports, "__esModule", { value: true });
exports.writeMarkdownHeadingId = exports.parseMarkdownString = exports.parseMarkdownContentTitle = exports.parseFrontMatter = exports.createExcerpt = exports.parseMarkdownHeadingId = void 0;
const tslib_1 = require("tslib");
const logger_1 = tslib_1.__importDefault(require("@docusaurus/logger"));
const gray_matter_1 = tslib_1.__importDefault(require("gray-matter"));
const slugger_1 = require("./slugger");
// Some utilities for parsing Markdown content. These things are only used on
// server-side when we infer metadata like `title` and `description` from the
// content. Most parsing is still done in MDX through the mdx-loader.
/**
 * Parses custom ID from a heading. The ID can contain any characters except
 * `{#` and `}`.
 *
 * @param heading e.g. `## Some heading {#some-heading}` where the last
 * character must be `}` for the ID to be recognized
 */
function parseMarkdownHeadingId(heading) {
    const customHeadingIdRegex = /\s*\{#(?<id>(?:.(?!\{#|\}))*.)\}$/;
    const matches = customHeadingIdRegex.exec(heading);
    if (matches) {
        return {
            text: heading.replace(matches[0], ''),
            id: matches.groups.id,
        };
    }
    return { text: heading, id: undefined };
}
exports.parseMarkdownHeadingId = parseMarkdownHeadingId;
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
// stripping out HTML tags and obtaining the first line.
/**
 * Creates an excerpt of a Markdown file. This function will:
 *
 * - Ignore h1 headings (setext or atx)
 * - Ignore import/export
 * - Ignore code blocks
 *
 * And for the first contentful line, it will strip away most Markdown
 * syntax, including HTML tags, emphasis, links (keeping the text), etc.
 */
function createExcerpt(fileString) {
    const fileLines = fileString
        .trimStart()
        // Remove Markdown alternate title
        .replace(/^[^\n]*\n[=]+/g, '')
        .split('\n');
    let inCode = false;
    let inImport = false;
    let lastCodeFence = '';
    for (const fileLine of fileLines) {
        if (fileLine === '' && inImport) {
            inImport = false;
        }
        // Skip empty line.
        if (!fileLine.trim()) {
            continue;
        }
        // Skip import/export declaration.
        if ((/^(?:import|export)\s.*/.test(fileLine) || inImport) && !inCode) {
            inImport = true;
            continue;
        }
        // Skip code block line.
        if (fileLine.trim().startsWith('```')) {
            const codeFence = fileLine.trim().match(/^`+/)[0];
            if (!inCode) {
                inCode = true;
                lastCodeFence = codeFence;
                // If we are in a ````-fenced block, all ``` would be plain text instead
                // of fences
            }
            else if (codeFence.length >= lastCodeFence.length) {
                inCode = false;
            }
            continue;
        }
        else if (inCode) {
            continue;
        }
        const cleanedLine = fileLine
            // Remove HTML tags.
            .replace(/<[^>]*>/g, '')
            // Remove Title headers
            .replace(/^#[^#]+#?/gm, '')
            // Remove Markdown + ATX-style headers
            .replace(/^#{1,6}\s*(?<text>[^#]*)\s*#{0,6}/gm, '$1')
            // Remove emphasis.
            .replace(/(?<opening>[*_]{1,3})(?<text>.*?)\1/g, '$2')
            // Remove strikethroughs.
            .replace(/~~(?<text>\S.*\S)~~/g, '$1')
            // Remove images.
            .replace(/!\[(?<alt>.*?)\][[(].*?[\])]/g, '$1')
            // Remove footnotes.
            .replace(/\[\^.+?\](?:: .*$)?/g, '')
            // Remove inline links.
            .replace(/\[(?<alt>.*?)\][[(].*?[\])]/g, '$1')
            // Remove inline code.
            .replace(/`(?<text>.+?)`/g, '$1')
            // Remove blockquotes.
            .replace(/^\s{0,3}>\s?/g, '')
            // Remove admonition definition.
            .replace(/:::.*/, '')
            // Remove Emoji names within colons include preceding whitespace.
            .replace(/\s?:(?:::|[^:\n])+:/g, '')
            // Remove custom Markdown heading id.
            .replace(/\{#*[\w-]+\}/, '')
            .trim();
        if (cleanedLine) {
            return cleanedLine;
        }
    }
    return undefined;
}
exports.createExcerpt = createExcerpt;
/**
 * Takes a raw Markdown file content, and parses the front matter using
 * gray-matter. Worth noting that gray-matter accepts TOML and other markup
 * languages as well.
 *
 * @throws Throws when gray-matter throws. e.g.:
 * ```md
 * ---
 * foo: : bar
 * ---
 * ```
 */
function parseFrontMatter(markdownFileContent) {
    const { data, content } = (0, gray_matter_1.default)(markdownFileContent);
    return {
        frontMatter: data,
        content: content.trim(),
    };
}
exports.parseFrontMatter = parseFrontMatter;
function toTextContentTitle(contentTitle) {
    if (contentTitle.startsWith('`') && contentTitle.endsWith('`')) {
        return contentTitle.substring(1, contentTitle.length - 1);
    }
    return contentTitle;
}
/**
 * Takes the raw Markdown content, without front matter, and tries to find an h1
 * title (setext or atx) to be used as metadata.
 *
 * It only searches until the first contentful paragraph, ignoring import/export
 * declarations.
 *
 * It will try to convert markdown to reasonable text, but won't be best effort,
 * since it's only used as a fallback when `frontMatter.title` is not provided.
 * For now, we just unwrap inline code (``# `config.js` `` => `config.js`).
 */
function parseMarkdownContentTitle(contentUntrimmed, options) {
    const removeContentTitleOption = options?.removeContentTitle ?? false;
    const content = contentUntrimmed.trim();
    // We only need to detect import statements that will be parsed by MDX as
    // `import` nodes, as broken syntax can't render anyways. That means any block
    // that has `import` at the very beginning and surrounded by empty lines.
    const contentWithoutImport = content
        .replace(/^(?:import\s(?:.|\r?\n(?!\r?\n))*(?:\r?\n){2,})*/, '')
        .trim();
    const regularTitleMatch = /^#[ \t]+(?<title>[^ \t].*)(?:\r?\n|$)/.exec(contentWithoutImport);
    const alternateTitleMatch = /^(?<title>.*)\r?\n=+(?:\r?\n|$)/.exec(contentWithoutImport);
    const titleMatch = regularTitleMatch ?? alternateTitleMatch;
    if (!titleMatch) {
        return { content, contentTitle: undefined };
    }
    const newContent = removeContentTitleOption
        ? content.replace(titleMatch[0], '')
        : content;
    if (regularTitleMatch) {
        return {
            content: newContent.trim(),
            contentTitle: toTextContentTitle(regularTitleMatch
                .groups.title.trim()
                .replace(/\s*(?:\{#*[\w-]+\}|#+)$/, '')).trim(),
        };
    }
    return {
        content: newContent.trim(),
        contentTitle: toTextContentTitle(alternateTitleMatch.groups.title.trim().replace(/\s*=+$/, '')).trim(),
    };
}
exports.parseMarkdownContentTitle = parseMarkdownContentTitle;
/**
 * Makes a full-round parse.
 *
 * @throws Throws when `parseFrontMatter` throws, usually because of invalid
 * syntax.
 */
function parseMarkdownString(markdownFileContent, options) {
    try {
        const { frontMatter, content: contentWithoutFrontMatter } = parseFrontMatter(markdownFileContent);
        const { content, contentTitle } = parseMarkdownContentTitle(contentWithoutFrontMatter, options);
        const excerpt = createExcerpt(content);
        return {
            frontMatter,
            content,
            contentTitle,
            excerpt,
        };
    }
    catch (err) {
        logger_1.default.error(`Error while parsing Markdown front matter.
This can happen if you use special characters in front matter values (try using double quotes around that value).`);
        throw err;
    }
}
exports.parseMarkdownString = parseMarkdownString;
function unwrapMarkdownLinks(line) {
    return line.replace(/\[(?<alt>[^\]]+)\]\([^)]+\)/g, (match, p1) => p1);
}
function addHeadingId(line, slugger, maintainCase) {
    let headingLevel = 0;
    while (line.charAt(headingLevel) === '#') {
        headingLevel += 1;
    }
    const headingText = line.slice(headingLevel).trimEnd();
    const headingHashes = line.slice(0, headingLevel);
    const slug = slugger.slug(unwrapMarkdownLinks(headingText).trim(), {
        maintainCase,
    });
    return `${headingHashes}${headingText} {#${slug}}`;
}
/**
 * Takes Markdown content, returns new content with heading IDs written.
 * Respects existing IDs (unless `overwrite=true`) and never generates colliding
 * IDs (through the slugger).
 */
function writeMarkdownHeadingId(content, options = { maintainCase: false, overwrite: false }) {
    const { maintainCase = false, overwrite = false } = options;
    const lines = content.split('\n');
    const slugger = (0, slugger_1.createSlugger)();
    // If we can't overwrite existing slugs, make sure other headings don't
    // generate colliding slugs by first marking these slugs as occupied
    if (!overwrite) {
        lines.forEach((line) => {
            const parsedHeading = parseMarkdownHeadingId(line);
            if (parsedHeading.id) {
                slugger.slug(parsedHeading.id);
            }
        });
    }
    let inCode = false;
    return lines
        .map((line) => {
        if (line.startsWith('```')) {
            inCode = !inCode;
            return line;
        }
        // Ignore h1 headings, as we don't create anchor links for those
        if (inCode || !line.startsWith('##')) {
            return line;
        }
        const parsedHeading = parseMarkdownHeadingId(line);
        // Do not process if id is already there
        if (parsedHeading.id && !overwrite) {
            return line;
        }
        return addHeadingId(parsedHeading.text, slugger, maintainCase);
    })
        .join('\n');
}
exports.writeMarkdownHeadingId = writeMarkdownHeadingId;
//# sourceMappingURL=markdownUtils.js.map