/*
 * Create regex pattern based on phrase
 *
 * We can't use starting word boundary `\b` since cases like \btest\b will catch not only ` test ` but also `ótest `, resulting in links placed in parts of the preceding phrase.
 * Diacritics are considered a non-word character, stupid Unicode :)
 * This should show the issue, not sure how long will this link last https://regex101.com/r/ZowXW9/1
 *
 * Since we shouldn't use negative lookahead in JS, I opted to use `[ >]+` (single space or end of opening HTML tag) and wrap the regex in a matching group.
 * This way we will only match phrase if it is placed after those two characters.
 *
 * Because we don't want to replace that additional space/bracket, we have to offset the beginning of the phrase by one character 'to the right'
 * Offset is configured by `MATCH_PHRASE_CAPTURING_GROUP_OFFSET` const
 */
export const getPhraseRegExp = (phrase: string): RegExp | null => {
    const pattern =
        "(?<=[\\s,.:;\"'>]|^)(" +
        phrase
            // divide into separate words
            .split(" ")
            // remove empty words
            .filter((x) => x)
            // escape special characters + replace "ó" with "&oacute;"
            .map((word) =>
                word
                    .replace(/[-–—[\]{}()*+?.,\\^$|#\s]/g, "\\$&")
                    .replace("ó", "&oacute;")
                    .replace("Ó", "&Oacute;")
            )
            // allow space, `-` and other dashes between words
            // ignore punctation '.' and ','
            .join("(([.|,| ]*?)|([\\-\\s]|&mdash;|&ndash;)*?)") +
        ")(?=[\\s,.:;\"']|$)";

    let regPhrase = null;
    try {
        regPhrase = new RegExp(pattern, "i");
    } catch (error) {
        console.log(`renderLinkInText - problem with phrase: ${[phrase]}`, error);
    }
    if (regPhrase == null) {
        return null;
    }

    return regPhrase;
};
