// Based on https://www.biblegateway.com/public/link-to-us/tooltips/bglinks.js
// This is the Bible reference parsing part extracted from upstream searchNode, which mixed too many responsibilities.
// Changes:
// - Fix so that Luke 23:54-24:1 parses correctly instead of as ["Luke 23:54-24", "1"]

// Used by: bible_gateway_links.js and BibleRef.svelte
// Tested by: app/javascript/@svelte/test/BibleRef.svelte

export interface BibleRef {
  book: string
  chapter: string
  verse: string
  separator?: string // Text to separate this Bible ref from the next — ';' or ','
  reference: string // "Hebrews 9:11-28" — Full reference (book + chapter + verse)
  matched_text: string //         "9:11-28" — May be same as ref, or may be a partial reference with book and chapter missing
  verse_match_text: string // "Hebrews 8:1-6; 9:11-28" — the outer matched text that may be split into multiple refs
}

export const book_regex_str =
  'Genesis|Gen?|Gn|Exodus|Exod?|Ex|Leviticus|Le?v|Numbers|Nu?m|Nu|Deuteronomy|Deut?|Dt|Josh?ua|Josh?|Jsh|Judges|Ju?dg|Jg|Ru(?:th)?|Ru?t|(?:1|i|2|ii) ?Samuel|(?:1|i|2|ii) ?S(?:a|m)|(?:1|i|2|ii) ?Sam|(?:1|i|2|ii) ?Kin(?:gs?)?|(?:1|i|2|ii) ?Kgs|(?:1|i|2|ii) ?Chronicles|(?:1|i|2|ii) ?Chr(?:o?n)?|(?:1|i|2|ii) ?Cr|Ezra?|Nehemiah|Neh?|Esther|Esth?|Jo?b|Psalms?|Psa?|Proverbs|Pro?v?|Ecclesiastes|Ec(?:cl?)?|Song (?:O|o)f Solomon|Song (?:O|o)f Songs?|Son(?:gs?)?|SS|Isaiah?|Isa?|Jeremiah|Je?r|Lamentations|La(?:me?)?|Ezekiel|Eze?k?|Daniel|Da?n|Da|Hosea|Hos?|Hs|Jo(?:el?)?|Am(?:os?)?|Obadiah|Ob(?:ad?)?|Jon(?:ah?)?|Jnh|Mic(?:ah?)?|Mi|Nah?um|Nah?|Habakkuk|Hab|Zephaniah|Ze?ph?|Haggai|Hagg?|Hg|Zechariah|Ze?ch?|Malachi|Ma?l|Matthew|Matt?|Mt|Mark|Ma(?:r|k)|M(?:r|k)|Luke?|Lk|Lu?c|John|Jn|Ac(?:ts?)?|Romans|Ro?m|(?:1|i|2|ii) ?Corinthians|(?:1|i|2|ii) ?C(?:or?)?|Galatians|Gal?|Gl|Ephesians|Eph?|Philippians|Phil|Colossians|Co?l|(?:1|i|2|ii) ?Thessalonians|(?:1|i|2|ii) ?Th(?:e(?:ss?)?)?|(?:1|i|2|ii) ?Timothy|(?:1|i|2|ii) ?Tim|(?:1|i|2|ii) ?T(?:i|m)|Ti(?:tus)?|Ti?t|Philemon|Phl?m|Hebrews|Heb?|Jam(?:es)?|Jms|Jas|(?:1|i|2|ii) ?Peter|(?:1|i|2|ii) ?Pe?t?|(?:1|i|2|ii|3|iii) ?J(?:oh)?n?|Jude?|Revelations?|Rev|R(?:e|v)'

// Given a reference like
//   Hebrews 8:1-6; 9:11-28
// returns an [iterator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators) that yields an object whose value is
// {book, chapter, verse}
export const parse_bible_ref = function* (text) {
  const space = `[\\u0020\\u00a0\\u1680\\u2000-\\u200a\\u2028-\\u202f\\u205f\\u3000]` // (this includes regular space, \u0020)
  // Why is \s used in some places and unicode_space in others? Why not use one space char class consistently?
  const dash = '(?:-|–|—)'
  const book_ = `(${book_regex_str})(?:.)?` // book + _any_ char(why??) (opt)
  const opt_book = `(?:` + book_ + space + `*` + `)?`
  const chapter_ = `(?:(\\d*):)` // chapter + ":"
  const separator = `(` + `\\s?` + `(?:,|;|&amp;|&|and|cf\\.|cf)` + `\\s?` + `)`

  // Finds list (1 or more) of references, separated by &,and,etc...
  // TODO: Can't this just be defined with composition as `${book_chap}(${separator}${book_chap})*` to avoid duplication?
  //   Main difference is the 1st book_chap has all components (Book + \d + ":") required; whereas the 2nd one they are all optional except for verse
  // Example: Hebrews 8:1-6; 9:11-28
  let ref_list_regex_str =
    book_ + // "Hebrews"
    space +
    '*' +
    chapter_ + // "8:"
    `\\d+(?:ff|f|\\w)?` + // "1"
    `(?:` + // "-6",  "; 9:11-28"
    `(?:` +
    `(?:` +
    `${dash}\\s?` +
    opt_book + // optional book, like the beginning of book_chap_regex
    `)` +
    `|` +
    separator + // ";"
    `)` +
    `\\s?` +
    `(?:` +
    `(?:(?:vv.|vs.|vss.|v.) ?)?` + // why isn't this in the one below?
    `\\d+\\w?` + // "6", "9" —  chapter/verse
    `)` +
    `(?::\\d+\\w?)?` + // , ":11-28" — :verse (optional) (what is the \w for?)
    `)*`
  var ref_list_regex = new RegExp(ref_list_regex_str, 'i')

  // Takes the result of ref_list_regex above (list of references) as input, and finds each _individual_ reference within it (+ optional separator).
  // At the same time, captures the book, chapter, and verse within that individual reference.
  // Example: "Hebrews 8:1-6;"
  var book_chap_regex_str =
    `(` + // matched_text: match[1] ("Hebrews 8:1-6") (without separator)
    opt_book + // book: match[2]: Hebrews (optional because this may match the end of a range of just chapter/verses, which _wouldn't_ (necessarily) have the book repeated)
    chapter_ +
    `?` + // chapter: match[3]: "8:" (optional because this may match the end of a range of just verses, which _wouldn't_ have a ":" in it)
    `(` + // verse: match[4]: "1-6"
    `\\d+` + // "1"
    `(?:` +
    `(?:ff|f|\\w)` + // "1ff"
    `|` +
    `(?:\\s?${dash}\\s?)?` + // "-"
    `(?:\\d+)?` + // "1-6" — verse (99% of time) or chapter
    `(?::\\d+?)?` + // :verse (optional) (for matching the "24:1" in "Luke 23:54-24:1")
    `)` +
    `)` +
    `)` +
    separator +
    `?` // separator: match[5] ";" (optional)
  var book_chap_regex = new RegExp(book_chap_regex_str, 'gi')

  var verse_match = text.match(ref_list_regex)
  if (verse_match == null) {
    return
  }

  const verse_match_text = verse_match[0]
  // console.log(verse_match_text)
  let book
  let chapter
  let verse
  let match

  // break up what may be multiple references
  while ((match = book_chap_regex.exec(verse_match_text))) {
    // console.log(match)
    const matched_text = match[1]
    const separator = match[5] || ''
    if (match[2] != '' && match[2] != null) {
      book = match[2]
    }
    if (match[3] != '' && match[3] != null) {
      chapter = match[3]
    }
    verse = match[4]
    var ref_text = book + ' ' + chapter + ':' + verse
    const ref: BibleRef = {
      book,
      chapter,
      verse,
      separator,
      reference: ref_text,
      matched_text,
      verse_match_text,
    }
    // console.log(ref)
    yield ref
  }
}

export default parse_bible_ref
