// const fs = require('node:fs/promises');

export const readFile = async (data, extn) => {
  // console.log(`Hi ${file}`);
  // var extn = file.split('.')[1];
  // console.log('extension = ', extn);

  let processedTranscript;

  try {
    // const data = await fs.readFile(file, { encoding: 'utf8' });

    if (extn === 'vtt') {
      processedTranscript = vttToJson(data);
    } else {
      if (extn === 'json') {
        processedTranscript = awsTranscribeToJson(JSON.parse(data));
      } else {
        processedTranscript = '<Unsupported File type>'; // for video and audio files
      }
    }
  } catch (err) {
    console.log(err);
  }

  var conversation = processedTranscript.parsed_json
    .map(converse => {
      return `${converse.speaker}: ${converse.text}`;
    })
    .join('\n');

  return conversation;
};

export const processZoomVtt = inputVtt => {
  var outputArray = [];
  const fileArray = inputVtt.toString().split('\n');
  var speaker_list = ['All'];
  var utterance_by_speaker = {};

  var value_json = {
    speakers: ['All'],
    parsed_json: [],
    by_speaker: {}
  };

  if (fileArray[0].trim() == 'WEBVTT') {
    for (var line = 0; line < fileArray.length; line++) {
      if (fileArray[line].includes(' --> ')) {
        var [speaker, ...utterance] = ['', ''];

        if (fileArray[line + 1].includes(':')) {
          [speaker, ...utterance] = fileArray[line + 1].split(':');
        } else {
          [speaker, ...utterance] = ['Speaker not Identified', fileArray[line + 1]];
        }
        var current_speaker = speaker.trim();

        var outputItem = {
          index: Number(fileArray[line - 1]),
          starttime: fileArray[line].split(' --> ')[0],
          endtime: fileArray[line].split(' --> ')[1],
          speaker: speaker.trim(),
          text: utterance.join(':').trim()
        };

        speaker_list = [...new Set(speaker_list.concat(current_speaker))];

        if (!utterance_by_speaker[current_speaker]) {
          utterance_by_speaker[current_speaker] = [];
        }

        utterance_by_speaker[current_speaker].push({
          text: utterance.join(':').trim(),
          index: Number(fileArray[line - 1]),
          starttime: fileArray[line].split(' --> ')[0],
          endtime: fileArray[line].split(' --> ')[1]
        });

        outputArray.push(outputItem);
      }
    }

    value_json.parsed_json = outputArray;
    value_json.speakers = speaker_list;
    value_json.by_speaker = utterance_by_speaker;

    console.log(`Parsed ${outputArray.length} items`);
    return value_json;
  } else {
    return { error: 'Invalid File!' };
  }
};

const toProperCase = dataString => {
  return dataString.replace(/\w\S*/g, function (txt) {
    return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
  });
};

export const vttToJson = vtt_value => {
  var more_tags = true;
  var tag_start_index = 0;
  var tag_close_index = 0;
  var value_close_index = 0;
  var tag_start_from = 0;
  var tag_name = '';
  var tag_value = '';
  var index = 1;

  var tag_begin_character = '<v';
  var tag_end_character = '>';
  var tag_close_character = '</v>';

  var value_json = {
    speakers: ['All'],
    parsed_json: [],
    by_speaker: {}
  };

  if (vtt_value.indexOf(tag_begin_character, tag_start_from) < 0) {
    return processZoomVtt(vtt_value.toString());
  }

  while (more_tags) {
    tag_start_index = vtt_value.indexOf(tag_begin_character, tag_start_from);

    if (tag_start_index > 0) {
      tag_close_index = vtt_value.indexOf(tag_end_character, tag_start_index);
      value_close_index = vtt_value.indexOf(tag_close_character, tag_close_index);
      tag_name = toProperCase(vtt_value.substring(tag_start_index + tag_begin_character.length, tag_close_index).trim());
      tag_value = vtt_value.substring(tag_close_index + tag_end_character.length, value_close_index).trim();

      value_json.parsed_json.push({ speaker: tag_name, text: tag_value, index: index });

      if (!value_json.by_speaker[tag_name]) {
        value_json.by_speaker[tag_name] = [];
      }

      value_json.by_speaker[tag_name].push({ text: tag_value, index: index });

      if (!value_json.speakers.includes(tag_name)) {
        value_json.speakers.push(tag_name);
      }

      tag_start_from = value_close_index + tag_close_character.length;
      index = index + 1;
    } else {
      more_tags = false;
    }
  }

  return value_json;
};

export const awsTranscribeToJson = input_file => {
  var input_payload = input_file;
  console.log(typeof input_payload);
  var transcript_items = input_payload['results']['items'];
  console.log(transcript_items.length);

  var current_speaker = '';
  var current_speaker_text = '';
  var current_start_time = '';
  var current_end_time = '';
  var utterance_array = [];
  var utterance_index = 0;
  var speaker_list = ['All'];
  var utterance_by_speaker = {};

  for (var item in transcript_items) {
    if (current_speaker != transcript_items[item]['speaker_label']) {
      if (current_speaker.length > 0 && current_speaker_text.length > 0) {
        utterance_index += 1;
        utterance_array.push({
          speaker: current_speaker,
          text: current_speaker_text,
          index: utterance_index,
          starttime: current_start_time,
          endtime: current_end_time
        });

        speaker_list = [...new Set(speaker_list.concat(current_speaker))];

        if (!utterance_by_speaker[current_speaker]) {
          utterance_by_speaker[current_speaker] = [];
        }

        utterance_by_speaker[current_speaker].push({
          text: current_speaker_text,
          index: utterance_index,
          starttime: current_start_time,
          endtime: current_end_time
        });
      }

      current_speaker = transcript_items[item]['speaker_label'];
      current_speaker_text = transcript_items[item]['alternatives'][0]['content'];
      current_start_time = transcript_items[item]['start_time'];
      current_end_time = transcript_items[item]['end_time'];
    } else {
      if (transcript_items[item]['type'] == 'punctuation') {
        current_speaker_text = `${current_speaker_text}${transcript_items[item]['alternatives'][0]['content']}`;
      } else {
        current_speaker_text = `${current_speaker_text} ${transcript_items[item]['alternatives'][0]['content']}`;
      }

      if (transcript_items[item]['end_time']) {
        current_end_time = transcript_items[item]['end_time'];
      }
    }
  }

  // console.log(utterance_array.length);
  //console.log(utterance_array);
  // console.log(speaker_list);

  var parsed_json_output = {
    speakers: speaker_list,
    parsed_json: utterance_array,
    by_speaker: utterance_by_speaker
  };

  // console.log(parsed_json_output);
  return parsed_json_output;
};
