import type { Tiktoken } from '@dqbd/tiktoken'

import { config } from 'config'
import { shouldUsePublishedVersion } from 'utils/publishing'
let tokenizer: Tiktoken | undefined

if (
  typeof window !== 'undefined' &&
  process.env.SCHEMA_BUILD !== 'true' &&
  !window['Cypress'] &&
  !shouldUsePublishedVersion()
) {
  // This is manually uploaded to our CDN to avoid Vercel overages for downloading it over and over again.
  // See https://linear.app/gamma-app/issue/G-4631/host-tiktoken-wasm-on-our-own-cdn-to-avoid-vercel-bandwidth-costs
  // NB: Dont forget to manually update it in staging (cdn-staging.gamma.app) and production (cdn.gamma.app) buckets!
  const host = config.ASSETS_CDN_HOST || 'cdn.gamma.app'
  import('@dqbd/tiktoken/init').then(({ get_encoding, init }) => {
    fetch(`https://${host}/_app_static/js/tiktoken-640a99ed3ce5ba67.wasm`)
      .then((response) => response.arrayBuffer())
      .then((wasm) => init((imports) => WebAssembly.instantiate(wasm, imports)))
      .then(() => {
        tokenizer = get_encoding('cl100k_base')
      })
      .catch((error) => {
        console.warn('Failed to load tokenizer', error)
      })
  })
}

export const APPROX_TOKENS_PER_WORD = 1.33
export const CHARACTERS_PER_TOKEN = 3.75

export const getTokenLength = (message?: string) => {
  if (!message) return 0
  if (!tokenizer) {
    // Use the approximation if the tokenizer is not loaded yet
    return Math.ceil(message.length / CHARACTERS_PER_TOKEN)
  }

  const encoded = tokenizer.encode(message)
  const length = encoded.length
  return length
}

// Beware using this function in foreign languages as Unicode characters can get split
// Inspired from https://github.com/dqbd/tiktokenizer/blob/master/src/utils/segments.ts
export function getStringTokensEnglish(input: string): string[] {
  if (!tokenizer) return []
  const encoded = tokenizer.encode(input)

  const textDecoder = new TextDecoder()
  const tokens: string[] = []
  for (let idx = 0; idx < encoded.length; idx++) {
    const token = encoded[idx]!
    const segmentText = textDecoder.decode(
      new Uint8Array(tokenizer.decode_single_token_bytes(token))
    )
    tokens.push(segmentText)
  }
  return tokens
}

export function stringToTokens(input: string): Uint32Array {
  if (!tokenizer) return new Uint32Array()
  const encoded = tokenizer.encode(input)
  return encoded
}

export function tokensToString(tokens: Uint32Array): string {
  if (!tokenizer) return ''
  const textDecoder = new TextDecoder()
  const decoded = tokenizer.decode(tokens)
  return textDecoder.decode(decoded)
}

export function capTokens(input: string, maxTokens: number): string {
  const tokens = stringToTokens(input)
  const shortened = tokens.slice(0, maxTokens)
  return tokensToString(shortened)
}
