import { getLocalUser } from "@/utils/helpers/localstorage";
import toLower from "lodash/toLower";

enum TranscriptionLineType {
  TEXT,
  SILENCE,
  BLANK,
  PAUSE,
  SOUND,
  INAUDIBLE,
  TYPING_SOUNDS,
  MUSIC,
}

type AudioTranscript = {
  speakerId?: number; //0: local, 1: remote
  type?: TranscriptionLineType;
  text?: string; // Only set when type is text
};

type Transcript = {
  transcriptLines?: AudioTranscript[] | undefined;
  elapsedStart?: number; //Based on the call duration, starting at 0
  elapsedEnd?: number | undefined; //Based on the call duration
};

interface IVoiceBot {
  microphoneAudioStream?: MediaStream | undefined;
  incomingAudioStream?: MediaStream | undefined;
  type: string;
  callId: string;
  remote?: string;
}

export enum VOICE_BOT_STATE {
  recordingStart,
  recordingStop,
  recordingLiveTranscript,
  recordingCallTranscriptDone,
}

export const baseVoiceBotUrl = window._env_.VOICE_BOT_URL;
export const voiceBotAuthorizationToken =
  window._env_.VOICE_BOT_AUTHORIZATION_TOKEN;

export default class VoiceBot {
  private static LIVE = "live";
  private static CALL = "call";

  private TAG!: string;
  private liveMediaRecorder: MediaRecorder | undefined;
  private incomingMediaRecorder: MediaRecorder | undefined;
  private microphoneMediaRecorder: MediaRecorder | undefined;
  private getCallSummaryTimeout: NodeJS.Timeout | undefined;
  private liveAudioChunks: any = [];
  private microphoneAudioBuffer: AudioBuffer | undefined;
  private incomingAudioBuffer: AudioBuffer | undefined;
  private detectedSilence: boolean = false;
  private incomingStream: MediaStream | undefined;
  private microphoneStream: MediaStream | undefined;
  private audioBotLiveActive: boolean = false;
  private callSummary?: string | undefined = undefined;
  private transcripts: Transcript[] = [];
  private callStartTime: number;
  private sipInstance = "urn:uuid:190a0c49-9b74-9f04-f1f4-d7ef095d1cff";
  private streaming: boolean = false;
  private callId: string;
  private remote?: string;
  private newWS: WebSocket | null = null;

  // callbacks
  private onTranscriptCallback!: any;
  private onVoiceBotStateChange!: any;

  constructor(obj: IVoiceBot) {
    this.incomingStream = obj.incomingAudioStream;
    this.microphoneStream = obj.microphoneAudioStream;
    this.TAG = obj.type;
    this.callId = obj.callId;
    this.remote = obj.remote;
    this.callStartTime = Date.now();

    console.log(this.TAG, "creating new Voicebot, callId=", this.callId);
    this.initIfNot();
  }

  public setTranscriptCallback(transcriptCallback: any) {
    this.onTranscriptCallback = transcriptCallback;
  }

  /**
   * This is called just before sending the transcription to NMS. This is the format saved inside the payloadPart of the
   * NmsObject.
   * @returns A JSON format of the transcriptions of the call.
   */
  public getFormattedTranscript() {
    return {
      "content-type": "application/vnd.voicebot+json",
      content: {
        callSummary: this.callSummary,
        transcript: this.transcripts,
      },
    };
  }

  public isTranscriptValid(ignoreSummary = false) {
    console.log(
      "transcript lines = ",
      this.transcripts.length,
      ", callSummary = ",
      this.callSummary
    );
    return (
      this.transcripts.length > 0 &&
      //this.transcripts[this.transcripts.length - 1].elapsedEnd &&
      this.transcripts[this.transcripts.length - 1].transcriptLines?.length &&
      (ignoreSummary || this.callSummary)
    );
  }

  public clearVoicebot() {
    this.newWS?.close();
    this.newWS = null;
    this.transcripts = [];
    this.liveMediaRecorder = undefined;
    this.incomingMediaRecorder = undefined;
    this.microphoneMediaRecorder = undefined;
    this.liveAudioChunks = [];
    this.microphoneAudioBuffer = undefined;
    this.incomingAudioBuffer = undefined;
    this.detectedSilence = false;
    this.incomingStream = undefined;
    this.microphoneStream = undefined;
    this.audioBotLiveActive = false;
    this.callSummary = "";
    this.callStartTime = 0;
  }

  public setOnVoiceBotStateChange(pOnCallTranscriptStateChange: any) {
    this.onVoiceBotStateChange = pOnCallTranscriptStateChange;
  }

  /**
   * This takes the OPUS 48Khz format that WebRTC uses and resamples it to 16kHz wav format that is required by AudioBot.
   * @param audioBuffer The OPUS decoded AudioStream to be resampled.
   * @param targetSampleRate The sampleRate which AudioBot requires (16kHz).
   * @returns The resampled audio stream.
   */
  private async resampleAudioBuffer(
    audioBuffer: AudioBuffer,
    targetSampleRate: number
  ): Promise<AudioBuffer> {
    const length =
      (audioBuffer.length * targetSampleRate) / audioBuffer.sampleRate;
    const offlineCtx = new OfflineAudioContext(
      audioBuffer.numberOfChannels,
      length,
      targetSampleRate
    );

    // Create a buffer source
    const bufferSource = offlineCtx.createBufferSource();
    bufferSource.buffer = audioBuffer;

    // Connect the source to the context
    bufferSource.connect(offlineCtx.destination);
    bufferSource.start(0);

    // Render the audio
    return await offlineCtx.startRendering(); // The rendered audio will be at 16 kHz
  }

  /**
   * Handles the transcription live, which means everytime we detect the end of a sentence (a pause).
   * @param text The current sentence in text. (transcribed)
   */
  private handleLiveTranscribe(text: string | undefined) {
    if (text && text !== "") {
      if (this.onTranscriptCallback) {
        this.onTranscriptCallback(text);
      }
    }
  }

  /**
   * Handle the transcription for a Call (non-live) scenario. This happens when the user stops AudioBot or ends the call.
   * @param text The final transcription of the session.
   */
  private handleCallTranscribe(text: string) {
    if (this.transcripts.length && this.getCallSummaryTimeout) {
      this.transcripts[this.transcripts.length - 1].transcriptLines?.push(
        ...this.parseTranscript(text)
      );
    } else {
      console.log(this.TAG, "ignore", text);
    }
  }

  private handleCallTranscriptDone(summary) {
    console.log(this.TAG, "Recorder: Question's response", summary);
    this.callSummary = summary;
    if (this.onVoiceBotStateChange) {
      this.onVoiceBotStateChange(
        this.callId,
        VOICE_BOT_STATE.recordingCallTranscriptDone
      );
    }
  }

  public requestCallSummary(checkTimeout = false, timeout = 2000) {
    if (checkTimeout && !this.getCallSummaryTimeout) return;
    clearTimeout(this.getCallSummaryTimeout);
    this.getCallSummaryTimeout = setTimeout(() => {
      this.getCallSummaryAfter();
    }, timeout);
  }

  private getCallSummaryAfter() {
    if (this.callSummary || !this.isTranscriptValid(true)) {
      console.log(this.TAG, "abort getCallSummaryAfter");
      this.clearVoicebot();
      return;
    }
    this.getCallSummary(this.prepareTranscriptForSummary())
      .then((callSummaryResponse) => {
        clearTimeout(this.getCallSummaryTimeout);
        this.getCallSummaryTimeout = undefined;
        return callSummaryResponse?.json();
      })
      .then((callSummaryJson) => {
        if (callSummaryJson?.response)
          this.handleCallTranscriptDone(callSummaryJson.response);
      });
  }
  /**
   * Decode the RTP stream received into a readable audio stream.
   * @param blob The RTP stream.
   * @returns A decoded AudioStream
   */
  public async decodeAudioDataForWAVConversation(
    blob: Blob
  ): Promise<AudioBuffer> {
    const audioCtx = new AudioContext();
    return blob.arrayBuffer().then((arrayBuffer: ArrayBuffer) => {
      return audioCtx.decodeAudioData(arrayBuffer);
    });
  }

  private floatTo16BitPCM(buffer: AudioBuffer) {
    const input = buffer.getChannelData(0);
    const second =
      buffer.numberOfChannels > 1 ? buffer.getChannelData(1) : undefined;
    const output = new ArrayBuffer(input.length * 2 * 2); //16
    const view = new DataView(output);
    let offset = 0;
    for (let i = 0; i < input.length; ++i) {
      if (!second) offset += 2; // put cur to 2nd if mono
      let s = Math.max(-1, Math.min(1, input[i]));
      view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
      offset += 2;
      if (second) {
        s = Math.max(-1, Math.min(1, second[i]));
        view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
        offset += 2;
      }
    }
    console.log(offset);
    return output;
  }

  /**
   * This takes care of calling the resampling function, converting it to WAV (raw) and then calling AudioBot
   * with the result to transcribe it.
   * @param audioBuffer The OPUS decoded AudioBuffer
   */
  private ResampleAndTranscribe(audioBuffer: AudioBuffer) {
    this.resampleAudioBuffer(audioBuffer, 16000)
      .then((resampledAudioBuffer: AudioBuffer) => {
        //const wavBuffer = toWav(resampledAudioBuffer); console.log(wavBuffer);
        const wavBuffer = this.floatTo16BitPCM(resampledAudioBuffer);
        //const wavBlob = new Blob([wavBuffer], { type: "audio/wav" });
        return this.transcribe(
          new Uint8Array(wavBuffer),
          this.TAG === VoiceBot.LIVE
        );
      })
      .catch((error) => {
        console.error(
          this.TAG,
          "Recorder: Caught error while transcribing:",
          error
        );
      });
  }

  /**
   * Parse the payload sent by AudioBot and create our JSON structure internally to be sent to NMS later.
   * @param text The payload coming form AudioBot.
   * @returns The JSON format required for NMS.
   */
  private parseTranscript(text: string): AudioTranscript[] {
    const lines = text.split("\n");
    const audioTranscripts: AudioTranscript[] = [];

    for (const line of lines) {
      console.log(this.TAG, "Recorder: parsing line :", line);
      const speakerMatch = line.match(/\(speaker (\d)\)/);
      const bracketMatch = line.match(/\[(.*?)\]/);
      const audioTranscript: AudioTranscript = {};

      if (speakerMatch) {
        audioTranscript.speakerId = parseInt(speakerMatch[1]);
      }

      if (bracketMatch) {
        switch (toLower(bracketMatch[1])) {
          case "blank_audio":
            audioTranscript.type = TranscriptionLineType.BLANK;
            break;
          case "silence":
            audioTranscript.type = TranscriptionLineType.SILENCE;
            break;
          case "pause":
            audioTranscript.type = TranscriptionLineType.PAUSE;
            break;
          case "sound":
            audioTranscript.type = TranscriptionLineType.SOUND;
            break;
          case "inaudible":
            audioTranscript.type = TranscriptionLineType.INAUDIBLE;
            break;
          case "typing":
          case "typing sounds":
            audioTranscript.type = TranscriptionLineType.TYPING_SOUNDS;
            break;
          case "music":
            audioTranscript.type = TranscriptionLineType.MUSIC;
            break;
          default:
            audioTranscript.type = TranscriptionLineType.BLANK;
            break;
        }
      } else {
        audioTranscript.type = TranscriptionLineType.TEXT;
        audioTranscript.text = speakerMatch
          ? line.replace(/\(speaker \d\)\s*/, "")
          : line;
      }

      if (
        audioTranscript.text === "" &&
        audioTranscript.type === TranscriptionLineType.TEXT
      ) {
        // ignore
      } else {
        audioTranscripts.push(audioTranscript);
      }
    }

    return audioTranscripts;
  }

  /**
   * Take our json structure of the transcriptions and make a nice text with Me: and Remote: as prefix.
   * @returns The whole text required by the AudioBot to summarize it.
   */
  private prepareTranscriptForSummary() {
    return this.transcripts
      .map((it) =>
        it.transcriptLines
          ?.filter((line) => line.type === TranscriptionLineType.TEXT)
          .map((line) =>
            line.speakerId === 0 ? `Me: ${line.text}` : `Remote: ${line.text}`
          )
          .join("\n")
      )
      .join("\n");
  }

  /**
   * This is used in live transcription to identify the end of a sentence. This is also when we decide to send a POST
   * to transcribe the current accumulated audio stream.
   * @param audioBuffer the accumulated Audio stream.
   * @param threshold the amount of sound that counts as silence.
   * @param minSilenceDuration the duration of the silence that counts as end of sentence.
   * @param sampleRate the samplerate of the audio we are analyzing. (16khz)
   * @returns true if a pause in the audio was detected.
   */
  private detectSilence(
    audioBuffer: AudioBuffer,
    threshold: number,
    minSilenceDuration: number,
    sampleRate: number
  ): boolean {
    const channelData = audioBuffer.getChannelData(0); // Get data from the first channel
    const samplesPerChunk = Math.floor(minSilenceDuration * sampleRate);
    const silentSegments: any = [];
    let isSilent = false;
    let silenceStart = 0;

    for (let i = 0; i < channelData.length; i++) {
      if (Math.abs(channelData[i]) < threshold) {
        if (!isSilent) {
          isSilent = true;
          silenceStart = i;
        }
      } else {
        if (isSilent) {
          const silenceEnd = i;
          const silenceDuration = (silenceEnd - silenceStart) / sampleRate;
          if (silenceEnd - silenceStart >= samplesPerChunk) {
            silentSegments.push({
              start: silenceStart / sampleRate,
              end: silenceEnd / sampleRate,
              duration: silenceDuration,
            });
          }
          isSilent = false;
        }
      }
    }

    return isSilent && channelData.length - silenceStart >= samplesPerChunk;
  }

  private restartLiveRecording() {
    this.liveAudioChunks = [];
    if (this.audioBotLiveActive) {
      this.startLiveRecording();
    }
  }

  /**
   * Callback when recording from microphone. This is used for the total call transcription, not live.
   * @param ev The blob event containing the AudioBuffer
   */
  private onCallMicrophoneRecorderDataAvailable(ev: BlobEvent) {
    console.log(
      this.TAG,
      "Recorder: received final data for microphone audio buffer"
    );
    if (this.microphoneMediaRecorder?.state === "inactive") {
      //const combinedBlob = this.combineBuffer(ev.data, this.microphoneAudioChunks?.[0]);
      //if (this.microphoneAudioChunks.length==0) this.microphoneAudioChunks = [ev.data.slice(0,340)]
      this.decodeAudioDataForWAVConversation(ev.data).then(
        (audioBuffer: AudioBuffer) => {
          console.log(this.TAG, "Recorder: decoded microphone audio buffer");
          this.microphoneAudioBuffer = audioBuffer;
          const combinedAudioBuffer = this.combineToStereo();
          if (combinedAudioBuffer) {
            this.sendStart();
            return this.ResampleAndTranscribe(combinedAudioBuffer);
          }
        }
      );
    }
  }

  /**
   * Callback when recording from the incoming audio source. This is used for the total call transcription, not live.
   * @param ev The blob event containing the AudioBuffer
   */
  private onCallIncomingRecorderDataAvailable(ev: BlobEvent) {
    console.log(
      this.TAG,
      "Recorder: received final data for incoming audio buffer"
    );
    if (this.incomingMediaRecorder?.state === "inactive") {
      //const combinedBlob = this.combineBuffer(ev.data, this.incomingAudioChunks?.[0]);
      //if (this.incomingAudioChunks.length==0) this.incomingAudioChunks = [ev.data.slice(0,340)]
      this.decodeAudioDataForWAVConversation(ev.data).then(
        (audioBuffer: AudioBuffer) => {
          console.log(this.TAG, "Recorder: decoded incoming audio buffer");
          this.incomingAudioBuffer = audioBuffer;
          const combinedAudioBuffer = this.combineToStereo();
          if (combinedAudioBuffer) {
            this.sendStart();
            return this.ResampleAndTranscribe(combinedAudioBuffer);
          }
        }
      );
    }
  }

  /**
   * Since our Audio Bot server can use the left channel and right channel for local and remote audio, we combine both
   * so it will correctly place the silence and sentences in the transcription.
   * @returns A single AudioBuffer that contains both Microphone and Incoming audio stream.
   */
  private combineToStereo() {
    console.log(
      this.TAG,
      `Recorder: incomingAudioBuffer=${this.incomingAudioBuffer} && microphoneAudioBuffer=${this.microphoneAudioBuffer}`
    );
    if (this.incomingAudioBuffer && this.microphoneAudioBuffer) {
      console.log(
        this.TAG,
        "Recorder: both audioStream are ready to be combined"
      );
      const audioCtx = new AudioContext();
      const numberOfChannels = 2;
      const length = this.incomingAudioBuffer.length;
      const sampleRate = this.incomingAudioBuffer.sampleRate;

      const stereoBuffer = audioCtx.createBuffer(
        numberOfChannels,
        length,
        sampleRate
      );

      const trimmedMicrophoneBuffer = audioCtx.createBuffer(
        1,
        length,
        sampleRate
      );
      const originalData = this.microphoneAudioBuffer.getChannelData(0);
      const trimmedData = trimmedMicrophoneBuffer.getChannelData(0);
      trimmedData.set(
        originalData.subarray(
          this.microphoneAudioBuffer.length - length,
          length
        )
      );

      stereoBuffer.getChannelData(0).set(trimmedData);
      stereoBuffer
        .getChannelData(1)
        .set(this.incomingAudioBuffer.getChannelData(0));

      this.incomingAudioBuffer = undefined;
      this.microphoneAudioBuffer = undefined;
      return stereoBuffer;
    }
  }

  /**
   * Callback when recording from the incoming audio source. This is used for live scenario where we display the
   * sentence each time we identify a silence of 0.5 seconds (configurable).
   * @param ev The blob event containing the AudioBuffer
   */
  private onLiveRecorderDataAvailable(ev: BlobEvent) {
    if (!this.audioBotLiveActive) {
      return;
    }
    console.log("onLiveRecorderDataAvailable", this.liveMediaRecorder?.state);
    if (this.liveMediaRecorder?.state === "inactive") {
      console.log(this.TAG, "Recorder: done recording, converting to wav...");
      this.liveAudioChunks.push(ev.data);
      const combinedBlob = new Blob(this.liveAudioChunks, {
        type: "audio/webm;codecs=opus",
      });
      this.decodeAudioDataForWAVConversation(combinedBlob)
        .then((audioBuffer) => {
          this.ResampleAndTranscribe(audioBuffer);
        })
        .finally(() => {
          console.info(this.TAG, "Recorder: clearing accumulated audio chunks");
          this.restartLiveRecording();
        })
        .catch((error) =>
          console.error(
            this.TAG,
            "Recorder: failed to convert to WAV with error:",
            error
          )
        );
    } else {
      console.log(
        this.TAG,
        "Recorder: requesting data, analyzing for silence..."
      );
      this.liveAudioChunks.push(ev.data);
      const combinedBlob = new Blob(this.liveAudioChunks, {
        type: "audio/webm;codecs=opus",
      });

      this.decodeAudioDataForWAVConversation(combinedBlob)
        .then((audioBuffer: AudioBuffer) => {
          if (this.detectSilence(audioBuffer, 0.01, 0.25, 16000)) {
            if (!this.detectedSilence) this.stopLiveRecording(false);
            else this.sendPing(); //keep alive
            this.detectedSilence = true;
          } else {
            this.sendPing(); //keep alive
            this.detectedSilence = false;
          }
        })
        .catch((error) =>
          console.error(
            this.TAG,
            "failed to convert to WAV with error: ",
            error
          )
        );
    }
  }

  /**
   * This is called by the user when activating AudioBot. This requires the incoming audio stream
   * (what we play in the speakers).
   * @returns true if we are able to start the MediaRecorder.
   */
  public startLiveRecording(): boolean {
    const lIncomingStream = this.incomingStream;
    try {
      if (lIncomingStream != null) {
        if (!this.liveMediaRecorder) {
          this.detectedSilence = false;
          this.liveAudioChunks = [];
          this.liveMediaRecorder = new MediaRecorder(lIncomingStream);
          this.liveMediaRecorder.ondataavailable =
            this.onLiveRecorderDataAvailable.bind(this);
        }
        if (this.liveMediaRecorder.state === "inactive") {
          console.log(this.TAG, "Recorder: starting live recording");
          this.liveMediaRecorder.start(1000);
          this.audioBotLiveActive = true;
        }
        this.sendStart(); //no exception, that's why no need to clear
        return true;
      }
      console.error(this.TAG, "Audio Stream not set");
      return false;
    } catch (e) {
      console.error(
        this.TAG,
        "Recorder: error starting media recorder (",
        e,
        ")"
      );
      this.liveMediaRecorder?.stop();
      this.liveMediaRecorder = undefined;
      return false;
    }
  }

  /**
   * This is called by the user when he stops AudioBot. This triggers also triggers a data available callback.
   */
  public stopLiveRecording(stop = true) {
    console.log(
      this.TAG,
      stop
        ? "Recorder: mediaRecorder state" + this.liveMediaRecorder?.state
        : "Recorder: detected new silence, transcribing!!!!"
    );
    if (this.liveMediaRecorder?.state === "recording") {
      console.log(this.TAG, "Recorder: stopping live recording");
      this.liveMediaRecorder.stop();
    }
    if (stop) {
      this.sendStop();
      this.liveMediaRecorder = undefined;
      this.audioBotLiveActive = false;
    }
  }

  /**
   * This is called by the user when activating AudioBot, this accumulates the whole AudioStream then sends the final
   * product to AudioBot server at the end of the recording.
   * @returns true if we are able to start the media recorder.
   */
  public startRecording() {
    const lMicrophoneStream = this.microphoneStream;
    const lIncomingStream = this.incomingStream;
    try {
      console.log(
        this.TAG,
        `Recorder: lMicrophoneStream: ${lMicrophoneStream}, lIncomingStream: ${lIncomingStream}`
      );
      if (lMicrophoneStream != null && lIncomingStream != null) {
        if (!this.incomingMediaRecorder) {
          this.incomingMediaRecorder = new MediaRecorder(lIncomingStream, {
            audioBitsPerSecond: 16000,
            mimeType: "audio/webm;codecs=opus",
          });
          this.incomingMediaRecorder.ondataavailable = (ev) => {
            this.onCallIncomingRecorderDataAvailable(ev);
          };
        }

        if (!this.microphoneMediaRecorder) {
          this.microphoneMediaRecorder = new MediaRecorder(lMicrophoneStream, {
            audioBitsPerSecond: 16000,
            mimeType: "audio/webm;codecs=opus",
          });
          this.microphoneMediaRecorder.ondataavailable = (ev) => {
            this.onCallMicrophoneRecorderDataAvailable(ev);
          };
        }

        if (
          this.incomingMediaRecorder.state === "inactive" &&
          this.microphoneMediaRecorder.state === "inactive"
        ) {
          this.incomingMediaRecorder.start();
          this.microphoneMediaRecorder.start();
          this.newTranscript();
          if (this.onVoiceBotStateChange) {
            this.onVoiceBotStateChange(
              this.callId,
              VOICE_BOT_STATE.recordingStart
            );
          }
          return true;
        }
      }
      return false;
    } catch (e) {
      console.error(
        this.TAG,
        "Recorder: error starting media recorder (",
        e,
        ")"
      );
      this.incomingMediaRecorder?.stop();
      this.incomingMediaRecorder = undefined;
      this.microphoneMediaRecorder?.stop();
      this.microphoneMediaRecorder = undefined;
      return false;
    }
  }

  /**
   * This is called by the user when deactivating the AudioBot feature.
   */
  public stopRecording() {
    if (this.incomingMediaRecorder?.state === "recording") {
      console.log(
        this.TAG,
        "Recorder: Stopping recording (incomingMediaRecorder)"
      );
      this.incomingMediaRecorder.stop();
    }
    if (this.microphoneMediaRecorder?.state === "recording") {
      console.log(
        this.TAG,
        "Recorder: Stopping recording (microphoneMediaRecorder)"
      );
      this.microphoneMediaRecorder.stop();
    }

    const lastTranscript: Transcript | undefined = this.transcripts.pop();
    // If no last transcript, we are in a bad state.
    if (lastTranscript) {
      lastTranscript.elapsedEnd = Date.now() - this.callStartTime;
      console.log(
        this.TAG,
        "stopRecording: setting elpasedEnd to:",
        lastTranscript.elapsedEnd
      );
      this.transcripts.push(lastTranscript);
    }

    if (this.onVoiceBotStateChange) {
      this.onVoiceBotStateChange(
        this.callId,
        VOICE_BOT_STATE.recordingCallTranscriptDone
      );
    }
  }

  private initIfNot() {
    if (!this.newWS) {
      const msisdn = getLocalUser();
      const sipInstance = this.sipInstance;
      const url = new URL(
        baseVoiceBotUrl +
          "?token=" +
          encodeURIComponent(voiceBotAuthorizationToken)
      );
      url.port = (parseInt(url.port ?? "3000") + 1).toString();
      console.log(url.href);
      this.newWS = new WebSocket(url.href);
      if (!this.newWS) return;
      this.newWS.onopen = () => {
        this.newWS?.send(
          JSON.stringify({ type: "connect", values: { msisdn, sipInstance } })
        );
      };
      this.newWS.onmessage = (evt: MessageEvent) => {
        console.log("message", evt.data);
        const json = JSON.parse(evt.data);
        switch (json.type) {
          case "livePartial":
            this.requestCallSummary(true);
            break;
          case "callSummary":
            this.handleCallTranscriptDone(json.values?.response);
            break;
          case "liveChunk":
            this.handleLiveTranscribe(json.values?.text);
            break;
          case "callTranscription":
            this.requestCallSummary(true);
            this.handleCallTranscribe(json.values?.text);
            break;
          default:
            console.log("ignore", json); //ignore e.g. pong
            break;
        }
      };
      this.newWS.onclose = () => {
        console.error("WebSocket close");
        this.newWS = null;
      };
      this.newWS.onerror = (error) => {
        console.error("WebSocket error:", error);
        this.newWS = null;
      };
    }
  }
  private sendPing() {
    if (this.newWS && this.newWS.readyState === WebSocket.OPEN)
      this.newWS.send(JSON.stringify({ type: "ping", values: {} }));
  }
  private sendStart() {
    const msisdn = getLocalUser();
    const elapsedStart = Date.now() - this.callStartTime;
    if (
      !this.streaming &&
      this.newWS &&
      this.newWS.readyState === WebSocket.OPEN
    )
      this.newWS.send(
        JSON.stringify({
          type: "startStreamingTranscription",
          values: {
            msisdn,
            sipInstance: this.sipInstance,
            callId: this.callId,
            remoteUsername: this.remote,
            elapsedStart,
            fraudDetectionEnabled: false,
            aiDetectionEnabled: true,
            callRecordingEnabled: true,
            callSummaryEnabled: true,
          },
        })
      );
    this.streaming = true;
  }
  private sendStop() {
    const msisdn = getLocalUser();
    if (
      this.streaming &&
      this.newWS &&
      this.newWS.readyState === WebSocket.OPEN
    )
      this.newWS.send(
        JSON.stringify({
          type: "stopStreamingTranscription",
          values: {
            msisdn,
            sipInstance: this.sipInstance,
            callId: this.callId,
            remoteUsername: this.remote,
            elapsedEnd: 0,
          },
        })
      );
    this.streaming = false;
  }
  //// calls interacting with the voice bot
  private newTranscript() {
    // We can assume we have a new transcript and calling startRecording
    const newTranscript: Transcript = {};
    newTranscript.transcriptLines = [];
    newTranscript.elapsedStart = Date.now() - this.callStartTime;
    this.transcripts.push(newTranscript);
  }

  /**
   * The POST that will transcribe the Audio stream.
   * @param blob The audio stream
   * @param isLiveTranscription If live, we add diarize which enables User identification based on stereo position.
   * @returns the transcribe audio into text.
   */
  private transcribe(
    blob: Uint8Array,
    isLiveTranscription: boolean,
    start = 0
  ) {
    if (
      this.streaming &&
      this.newWS &&
      this.newWS.readyState === WebSocket.OPEN
    ) {
      const chunk = Array.from(blob);
      const msisdn = getLocalUser();
      const callId = this.callId;
      for (let i = start; i < chunk.length; i += 32000) {
        const audioChunk = chunk.slice(i, i + 32000);
        this.newWS.send(
          JSON.stringify({
            type: "onTranscriptionAudioChunk",
            values: { msisdn, callId, audioChunk },
          })
        );
      }
    } //)
  }

  /**
   * The POST that will summarize the whole call based on the transcription we send.
   * @param transcriptLines All the lines that were previously transcribed.
   * @returns the summary in text.
   */
  private async getCallSummary(transcriptLines: string) {
    console.log(this.TAG, "Recorder: transcribing");
    const Obj = {
      server: "llama3",
      text: transcriptLines,
      quest: "what is this conversation about?",
      msisdn: getLocalUser(),
      callId: this.callId,
      remoteUsername: this.remote,
      sipInstance: this.sipInstance,
    };
    if (this.newWS && this.newWS.readyState === WebSocket.OPEN) {
      this.sendStop();
      Obj["type"] = "question";
      this.newWS.send(JSON.stringify(Obj));
    } else {
      //CORS
      return fetch(new URL("/question", baseVoiceBotUrl), {
        method: "POST",
        body: JSON.stringify(Obj),
        headers: {
          Authorization: "Bearer " + voiceBotAuthorizationToken,
        },
      });
    }
  }
}
