import { smithWaterman } from "@/lib/align";
import type { TextContent, TextItem } from "pdfjs-dist/types/src/display/api";

function normalizeQuotes(str: string) {
	return str.replace(/[\u2018\u2019]/g, "'").replace(/[\u201C\u201D]/g, '"');
}

const cleanTextForAlignment = (text: string) => {
	return normalizeQuotes(text.toLocaleLowerCase().replace(/\s/g, " "));
};

function removeInvisible(str: string) {
	const ranges = [
		"\u0000-\u001F", // Control characters
		"\u007F-\u009F", // Control characters
		"\u00AD", // Soft hyphen
		"\u200B-\u200F", // Zero-width characters
		"\u2060-\u206F", // Unicode formatting characters
		"\uFEFF", // Byte order mark
		"\u00A0", // Non-breaking space
	];

	const pattern = new RegExp(`[${ranges.join("")}]`, "g");

	return str.replace(pattern, "");
}

export type HighlightProps = {
	textToHighlight: {
		textStart: string;
		textEnd?: string;
	};
	pageIndicesToSearch: number[];
};

export interface HighlightResult {
	firstSpan: {
		item: TextItem;
		pageIndex: number;
		itemIndex: number;
	};
	lastSpan: {
		item: TextItem;
		pageIndex: number;
		itemIndex: number;
	};
	firstSpanCharIdx: number;
	lastSpanCharIdx: number;
}

export const computeHighlight = (
	textToHighlight: string,
	pageTexts: Array<{
		pageText: TextContent;
		pageIndex: number;
	}>,
): HighlightResult => {
	const spans: Array<{
		item: TextItem;
		pageIndex: number;
		itemIndex: number;
	}> = [];
	for (const { pageText, pageIndex } of pageTexts) {
		pageText.items.forEach((item, itemIndex) => {
			if ("str" in item && item.str.length > 0) {
				spans.push({ item: item, pageIndex, itemIndex });
			}
		});
	}

	const haystack = spans.map(({ item: text }) => text.str).join("");

	const [highlightAlignment, haystackAlignment] = smithWaterman(
		// We don't care about surrounding spaces for the highlighted text
		cleanTextForAlignment(removeInvisible(textToHighlight))
			.trim()
			.replace(/\s+/g, " "),
		// However, we do care about surrounding spaces for the haystack because they affect span positioning
		cleanTextForAlignment(haystack),
	);

	// find the index of the first character in the haystack
	const firstCharAlignmentIdx = highlightAlignment.findIndex(
		// don't include preceding spaces because they can mess up the alignment
		(x) => x.type === "char" && x.char !== " ",
	);
	// find the index of the last character in the haystack
	const lastCharAlignmentIdx = highlightAlignment.findLastIndex(
		// don't include remaining spaces because they can mess up the alignment
		(x) => x.type === "char" && x.char !== " ",
	);

	let firstCharIdx = -1;
	let lastCharIdx = -1;

	for (let i = firstCharAlignmentIdx; i < haystackAlignment.length; i++) {
		const pos = haystackAlignment[i];
		if (pos.type === "char") {
			firstCharIdx = pos.originalIndex;
			break;
		}
	}
	for (let i = lastCharAlignmentIdx; i >= 0; i--) {
		const pos = haystackAlignment[i];
		if (pos.type === "char") {
			lastCharIdx = pos.originalIndex;
			break;
		}
	}

	if (firstCharIdx === -1 || lastCharIdx === -1) {
		throw new Error("No search results found");
	}

	let firstSpanIdx = -1;
	let lastSpanIdx = -1;
	let firstSpanCharIdx = -1; // index of the first character in the first span
	let lastSpanCharIdx = -1; // index of the last character in the last span

	let cumulativeSpanLength = 0;
	for (let i = 0; i < spans.length; i++) {
		const { item: text } = spans[i];
		if (cumulativeSpanLength + text.str.length > firstCharIdx) {
			firstSpanIdx = i;
			firstSpanCharIdx = firstCharIdx - cumulativeSpanLength;
			break;
		}
		cumulativeSpanLength += text.str.length;
	}

	cumulativeSpanLength = 0;
	for (let i = 0; i < spans.length; i++) {
		const { item: text } = spans[i];
		if (cumulativeSpanLength + text.str.length > lastCharIdx) {
			lastSpanIdx = i;
			lastSpanCharIdx = lastCharIdx - cumulativeSpanLength;
			break;
		}
		cumulativeSpanLength += text.str.length;
	}

	const firstSpan = spans[firstSpanIdx];
	const lastSpan = spans[lastSpanIdx];

	return {
		firstSpan,
		lastSpan,
		firstSpanCharIdx,
		lastSpanCharIdx,
	};
};

export const computeGappedHighlight = (
	startingText: string,
	endingText: string,
	pageTexts: Array<{
		pageText: TextContent;
		pageIndex: number;
	}>,
): HighlightResult => {
	const startingAlignment = computeHighlight(startingText, pageTexts);

	const endingAlignment = computeHighlight(endingText, pageTexts);

	return {
		firstSpan: startingAlignment.firstSpan,
		lastSpan: endingAlignment.lastSpan,
		firstSpanCharIdx: startingAlignment.firstSpanCharIdx,
		lastSpanCharIdx: endingAlignment.lastSpanCharIdx,
	};
};
