import { Node } from "prosemirror-model";
import { findMatches } from "./findMatches";

/** TextRun represents a contiguous "run" of text, i.e. a single block of text in
 * some Node. A TextRun may contain multiple ProseMirror TextNodes because if
 * the run of text includes marks and other nodes that break up the "run" of
 * text into multiple contiguous TextNodes.
 */
export interface TextRun {
  // ProseMirror Pos pointing to the first position of the first TextNode
  startPos: number;
  // Contiguous TextNodes (without other InlineTokens in between) in a single
  // TextRun
  textNodes: Node[];
}

export interface Matches<Types extends string = string> {
  start: number;
  end: number;
  type: Types;
  content: string;
}

const THIN_SPACE = "\u2009";
const THIN_SPACE_REGEX = {
  type: "THIN_SPACE" as const,
  regex: new RegExp(THIN_SPACE, "g"),
  skipFirstGroup: false,
};

// /getTextRunsForNode breaks down a ProseMirror Node into multiple TextRuns,
// where each TextRun is a list of contiguous TextNodes (text nodes that are
// connected end to end without block or InlineTokens separating them).
export function getTextRunsForNode(nodeTop: Node, nodePos: number) {
  let startPos: number | null = null;
  let textNodes: Node[] = [];
  const textRuns: TextRun[] = [];
  nodeTop.descendants((n, pos) => {
    if (n.isText) {
      // If a TextNode, just add it to the textNodes queue to be added
      if (startPos == null) startPos = nodePos + 1 + pos;
      textNodes.push(n);
    } else {
      // Else, it's the end of a TextRun -- push a new TextRun into the list and
      // begin another TextRun
      if (startPos) {
        textRuns.push({ startPos, textNodes });
        startPos = null;
        textNodes = [];
      }
    }
  });

  // Flush any queued TextNodes into one last TextRun
  if (startPos) {
    textRuns.push({ startPos, textNodes });
    startPos = null;
    textNodes = [];
  }

  return textRuns;
}

export function getMatchesForEachRegex<Types extends string = string>(
  text: string,
  MarkSpecs: { type: Types; regex: RegExp; skipFirstGroup: boolean }[],
  trim = true,
): Matches<Types>[] {
  const matches = findMatches<Types | "THIN_SPACE">(
    text,
    THIN_SPACE_REGEX,
    ...MarkSpecs,
  );
  const result: Matches<Types>[] = [];
  for (const m of matches) {
    if (m.type === "THIN_SPACE") continue;
    let { start, end } = m;
    const { type } = m;
    if (trim) {
      // trimming in a very weird way, could have been solved with proper capturing group
      while (start < end && /\s/.test(text[start])) {
        start++;
      }
      while (start < end && /\s/.test(text[end - 1])) {
        end--;
      }
    }
    result.push({
      start,
      end,
      type,
      content: text.substring(start, end),
    });
  }
  return result;
}
