angular.module('LeasePilot').service('leaseProcessorService', function() {
  var squish = function(str) {
    return str
      .trim()
      .replace(/\u200B/g, '')
      .replace(/\s\s+/g, ' ');
  };

  var parseUnstructuredData = function(leaseJson) {
    var articles = parseArticles(leaseJson);
    var riders = parseRiders(leaseJson);
    return Promise.resolve({ articles: articles, riders: riders });
  };

  // `articles` structure:
  // {
  //   'ARTICLE 1': {
  //     'SECTION 1.1': { 'modified': false, 'category': 'Relocation', 'text': 'Text of section 1.1' },
  //     'SECTION 1.2': { 'modified': true, 'category': 'Kickout', 'text': 'Text of section 1.2' },
  //     ...
  //   },
  //   'ARTICLE 2': {
  //     'SECTION 2.1': { 'modified': false, 'category': 'Go Dark', 'text': 'Text of section 2.1' },
  //     ...
  //   },
  //   ...
  // }
  var parseArticles = function(leaseJson) {
    var articles = {};
    var $renderedLease = $('iframe[event-name="export"]');
    var modifiedFreeTextIds = _.map(leaseJson.freeTexts, 'sectionId');
    var conceptsMapping = leaseJson.form.conceptsMapping;

    $renderedLease
      .contents()
      .find('.lease h1')
      .each(function(i, article) {
        if (i === 0) return; // skip the first article

        var $article = $(article);
        var articleTitle = squish($article.text());
        // get all h2, h3, div tags between each pair of h1 tags
        var $sections = $article.nextUntil('h1').filter('h2, h3, div');
        if (!articles[articleTitle]) {
          articles[articleTitle] = {};
        }

        $sections.each(function(_, section) {
          var isDiv;
          if (section.nodeName === 'DIV') {
            var $section = $(section).children('h2, h3');
            if ($section.length === 0) return;
            section = $section[0];
            isDiv = true;
          }

          var $section = $(section);
          var sectionTitle;
          var text;

          // get free text ids inside section
          var sectionFreeTextIds = $section
            .find('span[free-text]')
            .map(function(_, freeText) {
              return parseInt($(freeText).attr('free-text'));
            })
            .toArray();

          // check if any of section's free texts was modified
          var isModified = false;
          sectionFreeTextIds.some(function(freeTextId) {
            if (modifiedFreeTextIds.includes(freeTextId)) {
              isModified = true;
              return true;
            }
          });

          // check if any of section's free texts belongs to concepts mapping
          var category = 'Other';
          sectionFreeTextIds.some(function(freeTextId) {
            if (category !== 'Other') {
              return true;
            }
            (conceptsMapping.sections || []).some(function(sectionMapping) {
              if (sectionMapping.freeTextIds.includes(freeTextId)) {
                category = sectionMapping.concept;
                return true;
              }
            });
          });

          if (section.nodeName === 'H2') {
            sectionTitle = squish($section.find('span[list]').text());
            text = squish($section.find('span[free-text]').text());
          } else if (section.nodeName === 'H3') {
            sectionTitle = squish(
              (isDiv ? $section.parent() : $section)
                .prevAll('h2:first')
                .children('span[list]')
                .text(),
            );
            text = squish($section.text());
          }
          if (!(sectionTitle && text)) return;

          if (!articles[articleTitle][sectionTitle]) {
            articles[articleTitle][sectionTitle] = {
              category: category,
              modified: isModified,
              text: text,
            };
          } else {
            articles[articleTitle][sectionTitle].text += ' ' + text;
            articles[articleTitle][sectionTitle].category = category;
            if (!articles[articleTitle][sectionTitle].modified) {
              articles[articleTitle][sectionTitle].modified = isModified;
            }
          }
        });
      });

    return articles;
  };

  // `riders` structure:
  // {
  //   'TI Allowance': {
  //     'title': 'CONSTRUCTION RIDER',
  //     'modified': false,
  //     'text': 'Text of rider'
  //   },
  //   ...
  //   'Guaranty Rider': [
  //      {
  //        'title': 'Guaranty Rider 1',
  //        'modified': false,
  //        'text': 'Text of rider'
  //      }
  //      {
  //        'title': 'Guaranty Rider 2',
  //        'modified': false,
  //        'text': 'Text of rider'
  //      }
  //    ]
  // }
  var parseRiders = function(leaseJson) {
    var riders = {};
    var modifiedFreeTextIds = _.map(leaseJson.freeTexts, 'sectionId');
    var conceptsMapping = leaseJson.form.conceptsMapping;

    (conceptsMapping.riders || []).forEach(function(riderMapping) {
      // TODO: remove this very dirty hack to parse multiple Guaranty Riders
      if (riderMapping.concept === 'Guaranty Rider') {
        riders['Guaranty Rider'] = [];
        $(
          "div[ng-repeat='guarantor in lease.guarantorInfo.entities track by $index'], div[ng-repeat='guarantor in lease.guarantorInfo.individuals track by $index'], div[ng-repeat='guarantor in lease.guarantorInfo.marriedCouples track by $index']",
        ).each(function(_, guarantor) {
          var currentRider = { modified: false, text: '' };
          riderMapping.freeTextIds.forEach(function(freeTextId) {
            var paragraph = $(guarantor)
              .find("span[free-text^='" + freeTextId + "']")
              .parent('p');

            if (paragraph.length > 0) {
              if (paragraph.css('textAlign') === 'center') {
                if (!currentRider.title) {
                  currentRider.title = squish(paragraph.text());
                }
              } else {
                currentRider.text += ' ' + paragraph.text();
                currentRider.text = squish(currentRider.text);
              }

              if (!currentRider.modified) {
                currentRider.modified = modifiedFreeTextIds.includes(
                  freeTextId,
                );
              }
            }
          });
          riders['Guaranty Rider'].push(currentRider);
        });
        return true;
      }

      riderMapping.freeTextIds.forEach(function(freeTextId) {
        var paragraph = $("span[free-text^='" + freeTextId + "']").parent('p');

        if (paragraph.length > 0) {
          if (!riders[riderMapping.concept]) {
            riders[riderMapping.concept] = { modified: false, text: '' };
          }
          if (paragraph.css('textAlign') === 'center') {
            if (!riders[riderMapping.concept].title) {
              riders[riderMapping.concept].title = squish(paragraph.text());
            }
          } else {
            riders[riderMapping.concept].text += ' ' + paragraph.text();
            riders[riderMapping.concept].text = squish(
              riders[riderMapping.concept].text,
            );
          }

          if (!riders[riderMapping.concept].modified) {
            riders[
              riderMapping.concept
            ].modified = modifiedFreeTextIds.includes(freeTextId);
          }
        }
      });
    });

    return riders;
  };

  return {
    parseUnstructuredData: parseUnstructuredData,
  };
});
