--------------------------------------------------------------------------------
--- Utilities for random generation
-- @module lua-nucleo.random
-- This file is a part of lua-nucleo library
-- @copyright lua-nucleo authors (see file `COPYRIGHT` for the license)
--------------------------------------------------------------------------------

local assert_is_number,
      assert_is_table
      = import 'lua-nucleo/typeassert.lua'
      {
        'assert_is_number',
        'assert_is_table'
      }

local ensure,
      ensure_equals
      = import 'lua-nucleo/ensure.lua'
      {
        'ensure',
        'ensure_equals'
      }

local taccumulate,
      tnormalize,
      tcount_elements
      = import 'lua-nucleo/table.lua'
      {
        'taccumulate',
        'tnormalize',
        'tcount_elements'
      }

local arguments,
      optional_arguments
      = import 'lua-nucleo/args.lua'
      {
        'arguments',
        'optional_arguments'
      }

local type_imports = import 'lua-nucleo/type.lua' ()

local math_abs = math.abs
local pairs = pairs

-- main algorithm value, got 99% chance of false negative
-- (though high chance of false positive accordingly)
-- http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm
local CHI_CRITICAL_LIST =
{
  006.635; 009.210; 011.345; 013.277; 015.086; 016.812; 018.475; 020.090;
  021.666; 023.209; 024.725; 026.217; 027.688; 029.141; 030.578; 032.000;
  033.409; 034.805; 036.191; 037.566; 038.932; 040.289; 041.638; 042.980;
  044.314; 045.642; 046.963; 048.278; 049.588; 050.892; 052.191; 053.486;
  054.776; 056.061; 057.342; 058.619; 059.893; 061.162; 062.428; 063.691;
  064.950; 066.206; 067.459; 068.710; 069.957; 071.201; 072.443; 073.683;
  074.919; 076.154; 077.386; 078.616; 079.843; 081.069; 082.292; 083.513;
  084.733; 085.950; 087.166; 088.379; 089.591; 090.802; 092.010; 093.217;
  094.422; 095.626; 096.828; 098.028; 099.228; 100.425; 101.621; 102.816;
  104.010; 105.202; 106.393; 107.583; 108.771; 109.958; 111.144; 112.329;
  113.512; 114.695; 115.876; 117.057; 118.236; 119.414; 120.591; 121.767;
  122.942; 124.116; 125.289; 126.462; 127.633; 128.803; 129.973; 131.141;
  132.309; 133.476; 134.642;
}

-- Function roughly determines if distribution of values in table experiments
-- corresponds distribution in weights. Max number of elements check - 100.
-- algorithm based on Hi square Pearson's test.
-- http://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
local validate_probability_rough = function(weights, experiments)
  -- input checks
  arguments(
    "table", weights,
    "table", experiments
  )
  local length = 0
  for k, v in pairs(weights) do
    assert_is_number(weights[k])
    assert_is_number(experiments[k])
    length = length + 1
  end
  if length > 100 or length < 2 then
    return nil, "argument: wrong weight table size"
  end
  if tcount_elements(experiments) ~= length then
    return nil, "argument: wrong experiments table size"
  end
  local experiments_num = taccumulate(experiments)
  if experiments_num < 1000 then
    return nil, "argument: lack of experiments data"
  end

  -- data preparation
  local weights_normalized = tnormalize(weights)
  local experiments_normalized = tnormalize(experiments)
  local chi_square = 0

  -- algorithm itself
  for k, v in pairs(weights_normalized) do
    local delta = math_abs(weights_normalized[k] - experiments_normalized[k])
    chi_square = chi_square + (100 * delta * delta) / weights_normalized[k]
  end

  return chi_square < CHI_CRITICAL_LIST[length - 1]
end

-- Function precisely determines if distribution of values in table experiments
-- corresponds distribution in weights.
-- algorithm based on experiment probability check.
-- algorithm:
-- We check the alteration of chi square after ten times experiments growth.
-- If this change tends to 1 (see STEP_STAGNATION) we increase probability
-- that generated experiments doesn't match weights (-1 to decision variable),
-- if it tends to 10 (see STEP_IMPROVEMENT) we increase probability that
-- generated experiments match weights (+1 to decision variable).
-- When decision variable reaches DECISION_VALUE limit - return whatever have.
-- If we cant decide in step_limit iterations we return that we increase basic
-- number of experiments, until reach INCREASE_LIMIT. Then we return nil, err.
-- (that usually can mean that weights contain too low-probable values or we
-- need further investigation).
local validate_probability_precise = function(weights, generate, ...)
  -- input checks
  arguments(
    "table", weights,
    "function", generate
  )
  for k, v in pairs(weights) do
    assert_is_number(weights[k])
  end
  local weights_normalized = tnormalize(weights)
  for k, v in pairs(weights_normalized) do
    if
      weights_normalized[k] < 1e-5
    then
      return nil, "Input weights below level of sensitivity"
    end
  end
  local test_table = generate(50, ...)
  assert_is_table(test_table)
  for k, v in pairs(test_table) do
    assert_is_number(v)
  end
  if tcount_elements(test_table) ~= tcount_elements(weights) then
    return nil, "argument: wrong generated table size"
  end
  local experiments_num = taccumulate(test_table)
  if experiments_num ~= 50 then
    return nil, "argument: wrong generated table data"
  end

  -- various algorithm variables initialization
  local DECISION_VALUE = 8
  local BASE_SENSITIVITY = 3 -- 3-5 reasonable values
  local SENSITIVITY_DELTA = 2
  local INCREASE_LIMIT = 7 - SENSITIVITY_DELTA - BASE_SENSITIVITY
  local sensitivity = BASE_SENSITIVITY -- power of number of experiments used
  local chi_squares = {} -- chi square container
  local iteration = 0 -- iteration counter
  local decision = 0 -- decision making value

  -- algorithm itself
  while true do
    -- check if we can return
    -- experience has shown that 8 value works ok
    -- more means less chances to fail, but more time to work
    if decision >= DECISION_VALUE then return true
    elseif decision <= -DECISION_VALUE then return false end

    -- exponential experiments cycle: 10^n
    for n = 0, SENSITIVITY_DELTA do
      -- data preparation
      local experiments_num = 10 ^ (sensitivity + n)
      local experiments = generate(experiments_num, ...)
      local experiments_normalized = tnormalize(experiments)

      -- calculate chi_square for current experiments num
      chi_squares[n] = 0;
      for k, v in pairs(weights_normalized) do
        local delta = math_abs(v - experiments_normalized[k])
        chi_squares[n] = chi_squares[n] + (100 * delta * delta) / v
      end
    end

    local overal_change = chi_squares[0] / chi_squares[SENSITIVITY_DELTA]
    local first_change = chi_squares[0] / chi_squares[1]
    local second_change = chi_squares[1] / chi_squares[SENSITIVITY_DELTA]

    -- TEMP! neat debug output, delete if found and do not know what it for
    --> print(decision, first_change, second_change, overal_change)

    -- check signs of definite chi_square dynamics
    -- all constants are test-based
    local OVERALL_IMPROVEMENT = 90
    local STEP_IMPROVEMENT = 9
    local STEP_IMPROVEMENT_TOP = 50
    local STEP_STAGNATION_LOW = 0.5
    local STEP_STAGNATION_TOP = 2
    local STEP_STAGNATION = 1.2
    local OVERALL_STAGNATION_LOW = 0.25
    local OVERALL_STAGNATION_TOP = 4

    if overal_change > OVERALL_IMPROVEMENT then
      decision = decision + 1
    end
    if
      first_change > STEP_IMPROVEMENT and
      second_change > 1 and
      first_change < STEP_IMPROVEMENT_TOP
    then
      decision = decision + 1
    end
    if
      second_change > STEP_IMPROVEMENT and
      first_change > 1 and
      second_change < STEP_IMPROVEMENT_TOP
    then
      decision = decision + 1
    end
    if
      overal_change > OVERALL_STAGNATION_LOW and
      overal_change < OVERALL_STAGNATION_TOP
    then
      decision = decision - 1
    end
    if
      second_change > STEP_STAGNATION_LOW and
      second_change < STEP_STAGNATION_TOP
    then
      decision = decision - 1
    end
    if
      first_change > STEP_STAGNATION_LOW and
      first_change < STEP_STAGNATION_TOP
    then
      decision = decision - 1
    end

    iteration = iteration + 1

    -- we are too deep, and accuracy is too high, experiments matches weights
    if
      sensitivity == BASE_SENSITIVITY + INCREASE_LIMIT - 1
      and chi_squares[SENSITIVITY_DELTA] < 1e-5
    then
      return true
    end

    if iteration > DECISION_VALUE * (0.5 + sensitivity - BASE_SENSITIVITY) then
      -- TEMP! debug output, delete if found and do not know what it for
      --> print("Below level of sensitivity " .. sensitivity)
      if
        sensitivity >= BASE_SENSITIVITY + INCREASE_LIMIT
      then
        return nil, "Below level of sensitivity"
      end
      sensitivity = sensitivity + 1
    end
  end
end

return
{
  validate_probability_rough = validate_probability_rough;
  validate_probability_precise = validate_probability_precise;
}