import JSZip from "jszip";

// Function to extract text from a PPTX file
export const extractTextFromPPT = async (file: File) => {
  const zip = new JSZip();
  const arrayBuffer = await file.arrayBuffer();
  const pptxZip = await zip.loadAsync(arrayBuffer);

  let extractedText = "";

  // Extract slides text from the PPTX file
  const slideRegex = /^ppt\/slides\/slide\d+\.xml$/;
  const slideFiles = Object.keys(pptxZip.files).filter((fileName) =>
    slideRegex.test(fileName),
  );

  for (const slideFile of slideFiles) {
    const slideXml = await pptxZip.file(slideFile)?.async("text");
    if (slideXml) {
      const slideText = extractTextFromSlideXml(slideXml);
      extractedText += `${slideText}\n`;
    }
  }

  return extractedText.trim();
};

// Helper function to extract text from slide XML
const extractTextFromSlideXml = (xml: string) => {
  const parser = new DOMParser();
  const xmlDoc = parser.parseFromString(xml, "application/xml");
  const textElements = Array.from(xmlDoc.getElementsByTagName("a:t"));

  let slideText = "";
  for (const textElement of textElements) {
    slideText += `${textElement.textContent} `;
  }

  return slideText.trim();
};

// Extract PPT/PPTX slide count
// Function to get the slide count of a PPTX file
export const getPPTSlideCount = async (file: File) => {
  const zip = new JSZip();
  const arrayBuffer = await file.arrayBuffer();
  const pptxZip = await zip.loadAsync(arrayBuffer);

  // Regex to match slide files inside the pptx archive
  const slideRegex = /^ppt\/slides\/slide\d+\.xml$/;
  const slideFiles = Object.keys(pptxZip.files).filter((fileName) =>
    slideRegex.test(fileName),
  );

  // Return the number of slides
  return slideFiles.length;
};
