# Anderson-Darling test

export OneSampleADTest, KSampleADTest

abstract type ADTest <: HypothesisTest end

## ONE SAMPLE AD-TEST
### http://www.itl.nist.gov/div898/handbook/eda/section3/eda35e.htm

function adstats(x::AbstractVector{T}, d::UnivariateDistribution) where T<:Real
    n = length(x)
    A² = convert(Float64, -n)
    for i = 1:n
        lcdfz = logcdf(d, x[i])
        lccdfz = logccdf(d, x[n - i + 1])
        A² -= (i+i - 1)/n * (lcdfz + lccdfz)
    end
    return A²
end

struct OneSampleADTest <: ADTest
    n::Int      # number of observations
    μ::Float64  # sample mean
    σ::Float64  # sample std
    A²::Float64 # Anderson-Darling test statistic
end

"""
    OneSampleADTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)

Perform a one-sample Anderson–Darling test of the null hypothesis that the data in vector
`x` come from the distribution `d` against the alternative hypothesis that the sample
is not drawn from `d`.

Implements: [`pvalue`](@ref)
"""
function OneSampleADTest(x::AbstractVector{T}, d::UnivariateDistribution) where T<:Real
    n = length(x)
    μ, σ = mean_and_std(x)
    y = sort(x)
    OneSampleADTest(n, μ, σ, adstats(y, d))
end

testname(::OneSampleADTest) = "One sample Anderson-Darling test"
default_tail(test::OneSampleADTest) = :right

function show_params(io::IO, x::OneSampleADTest, ident = "")
    println(io, ident, "number of observations:   $(x.n)")
    println(io, ident, "sample mean:              $(x.μ)")
    println(io, ident, "sample SD:                $(x.σ)")
    println(io, ident, "A² statistic:             $(x.A²)")
end

### G. and J. Marsaglia, "Evaluating the Anderson-Darling Distribution", Journal of Statistical Software, 2004
function StatsAPI.pvalue(t::OneSampleADTest)
    g1(x) = sqrt(x)*(1.0-x)*(49.0x-102.0)
    g2(x) = -0.00022633 + (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864x)x)x)x)x
    g3(x) = -130.2137 + (745.2337 - (1705.091 - (1950.646 - (1116.360 - 255.7844x)x)x)x)x

    n = t.n
    z = t.A²
    y = if z < 2.0
        exp(-1.2337141/z)*(2.00012+(0.247105-(.0649821-(.0347962-(.0116720-.00168691*z)*z)*z)*z)*z)/sqrt(z)
    else
        exp(-exp(1.0776-(2.30695-(.43424-(.082433-(.008056-.0003146*z)*z)*z)*z)*z))
    end

    pv = y
    if y > 0.8
        pv += g3(y)/n
    else
        c = 0.01265 + 0.1757/n
        if y < c
            pv += (0.0037/n^3 + 0.00078/n^2 + 0.00006/n)*g1(y/c)
        else
            pv += (0.04213/n + 0.01365/n^2)*g2((y - c)/(0.8 - c))
        end
    end
    return 1.0 - pv
end

## K-SAMPLE ANDERSON DARLING TEST
struct KSampleADTest{T<:Real} <: ADTest
    k::Int             # number of samples
    n::Int             # number of observations
    σ::Float64         # variance A²k
    A²k::Float64       # Anderson-Darling test statistic
    modified::Bool     # whether the modified statistic is used
    nsim::Int          # number of simulations for P-value calculation (0 - for asymptotic calculation)
    samples::Vector{T} # pooled samples
    sizes::Vector{Int} # sizes of samples
end

"""
    KSampleADTest(xs::AbstractVector{<:Real}...; modified = true, nsim = 0)

Perform a ``k``-sample Anderson–Darling test of the null hypothesis that the data in the
``k`` vectors `xs` come from the same distribution against the alternative hypothesis that
the samples come from different distributions.

`modified` parameter enables a modified test calculation for samples whose observations
do not all coincide.

If `nsim` is equal to 0 (the default) the asymptotic calculation of p-value is used.
If it is greater than 0, an estimation of p-values is used by generating `nsim` random splits
of the pooled data on ``k`` samples, evaluating the AD statistics for each split, and
computing the proportion of simulated values which are greater or equal to observed.
This proportion is reported as p-value estimate.

Implements: [`pvalue`](@ref)

# References

  * [Scholz and Stephens (1987). K-Sample Anderson-Darling Tests](@cite scholz1987)
"""
KSampleADTest(xs::AbstractVector{T}...; modified = true, nsim = 0) where T<:Real =
    a2_ksample(xs, modified, nsim)

testname(::KSampleADTest) = "k-sample Anderson-Darling test"
default_tail(test::KSampleADTest) = :right

function show_params(io::IO, x::KSampleADTest, ident = "")
    println(io, ident, "number of samples:        $(x.k)")
    println(io, ident, "number of observations:   $(x.n)")
    println(io, ident, "SD of A²k:                $(x.σ)")
    println(io, ident, "A²k statistic:            $(x.A²k)")
    println(io, ident, "standardized statistic:   $((x.A²k - x.k + 1) / x.σ)")
    println(io, ident, "modified test:            $(x.modified)")
    println(io, ident, "p-value calculation:      $(x.nsim == 0 ? "asymptotic" : "simulation" )")
    x.nsim != 0 && println(io, ident, "number of simulations:    $(x.nsim)")
end

"""Monte-Carlo simulation of the p-value for AD test"""
function pvaluesim(x::KSampleADTest)
    Z = sort(x.samples)
    Z⁺ = unique(Z)

    cn = cumsum(x.sizes)
    insert!(cn, 1, 0.0)
    Xr = [cn[i]+1:cn[i+1] for i in 1:x.k]
    idxs = collect(1:x.n)
    IV = [view(idxs, Xr[i]) for i in 1:x.k]
    Xr = [view(x.samples, IV[i]) for i in 1:x.k]

    pv = 0
    for j in 1:x.nsim
        shuffle!(idxs)
        A²k, A²km = adkvals(Z⁺, x.n, Xr)
        adv = x.modified ? A²km : A²k
        adv >= x.A²k && (pv += 1)
    end
    return pv/x.nsim
end

const M   = [1,2,3,4,6,8,10,20]
const PV  = [.00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,.925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999]
const TKM = [-1.1923472556232377 -1.1779755032374564 -1.1689824368072272 -1.1424999536072922 -1.1274927776533619 -1.079431992286445 -1.0504873030014337 -0.9989259798052196 -0.9425379414511752 -0.8984863629243318 -0.8603859837928499 -0.7262450719682598 -0.595805134143589 -0.4564972961242285 -0.2961017594430343 -0.10042062839743962 0.15779986189933617 0.5377525370208343 1.2270085983886296 1.5270251901768497 1.9597314709824634 2.7268894645107142 3.7805477044406355 4.121285194909089 4.59629487851025 5.4144038948723905 6.476499981200343 6.818209982829944 7.288086696921435 8.091812033522697 9.184790239278916 9.59709802719259 10.088416692202337 10.914846562007554 12.040789367454868;
             -1.5849548989018964 -1.5407036306002324 -1.5217650056531025 -1.468313040457585 -1.4383460114029611 -1.3510451980424227 -1.2994715185279744 -1.2107011085917234 -1.1182349910369536 -1.0480894352574122 -0.9888467602015968 -0.792282309179151 -0.6158884225411821 -0.4357973228266815 -0.24055211468641896 -0.013263022467496355 0.2679318847085987 0.6545019598726984 1.3035350042516591 1.5717238802812676 1.9472543460769594 2.5917105532062927 3.4417697301287804 3.708500478251771 4.08514071675675 4.729002778898586 5.553060449998679 5.8246988123153525 6.2044102945806 6.8092648417824355 7.564819358105057 7.8751443348359444 8.265612297736865 9.09572889834988 9.634634570153578;
             -1.8179842473942176 -1.7701061384541197 -1.7430115382228353 -1.6664153177996697 -1.6277529354269717 -1.509882048484231 -1.4423296156847323 -1.327021532216986 -1.2105162624023968 -1.1240541192874598 -1.0516241510661641 -0.8188455856721732 -0.6168586530804309 -0.4186963520248099 -0.20789666676568955 0.0295008713522725 0.31671631783726234 0.6992815580015851 1.3223749183602587 1.5732385052700641 1.9236845463814185 2.5085099219292766 3.2689645805547753 3.5044925007442225 3.841724525500276 4.399781356888528 5.114888256216994 5.346077131581993 5.659256532458322 6.257314173278837 6.986207519686372 7.219730636115486 7.592515097944003 8.120907844495369 8.723051996739617;
             -1.9989354254437397 -1.9372512947973601 -1.9085397775154465 -1.8141114782122008 -1.7646890249660294 -1.6175716157784974 -1.5372145799225008 -1.4011822939743703 -1.2672404489587346 -1.169926310440552 -1.0887002685066605 -0.8322081015222909 -0.6140517112328044 -0.404166231020204 -0.18335516739183028 0.06015392460374823 0.34855873939459553 0.7267832591857595 1.3282605791296271 1.5672982936172346 1.8987053911907181 2.4483775155779135 3.157299591873491 3.3747262669186058 3.6797595381737613 4.192286238183518 4.869623799822088 5.080242748754673 5.379051520938655 5.8747370994257535 6.5004003034881785 6.697073027583033 7.03600074408326 7.445733876827231 8.020320100696683;
             -2.260406253565197 -2.1753074859488235 -2.128991194356946 -2.0065848989178288 -1.945510672007325 -1.7624757482456306 -1.6635124607248777 -1.4990052836398609 -1.3392374211352471 -1.2252207360751253 -1.1319971726969635 -0.8445584556230463 -0.6076273028769662 -0.3834824380209529 -0.15483904255918668 0.09368829578808609 0.3831767280474241 0.7547390312514602 1.3312274974190232 1.5571379748618364 1.865766664385659 2.3730178158029442 3.0080178359348353 3.1978501989364085 3.472363926096708 3.931316296612841 4.536760955793221 4.724863565260161 4.962319348947046 5.408613188229584 6.007714186756578 6.198538388520974 6.457796867859084 6.849495066215343 7.4640951217005576;
             -2.440876951075476 -2.341332708197796 -2.2804325041192546 -2.1402371332779366 -2.06667689211804 -1.8548276337573943 -1.7393591986264094 -1.5563125073892916 -1.3811208567928928 -1.257037173358548 -1.1566367065963437 -0.8485596103567375 -0.5998647718737802 -0.36811584852000284 -0.13474953508678567 0.11602561466210987 0.40573641653024317 0.7727862506626891 1.3335441037600442 1.5496136443763067 1.8418545847979217 2.317705414653881 2.917031508317842 3.0961555736231836 3.350246199357208 3.7727674902587927 4.339241095752881 4.516143795536152 4.753763012043537 5.116810577647693 5.672738471706687 5.83139110084699 6.050071129419877 6.401072284239558 6.742573268315121;
             -2.551081192060893 -2.4547632386210463 -2.3949899123424965 -2.2354277972087395 -2.1584171777926153 -1.9275339742517925 -1.800538561713763 -1.5999251384582587 -1.4106960313169241 -1.2786101238180516 -1.171674441939073 -0.8505458546842916 -0.5938925384383626 -0.35689852586669696 -0.11989003705869453 0.13208631237916915 0.42063871492979277 0.7821012141619668 1.3302949325805677 1.5403968280367855 1.8241731782273896 2.2856162625735688 2.8570374013416835 3.0361153244189336 3.274335630236937 3.6852545534103474 4.195577479309726 4.35310621764469 4.569614918885558 4.92866257900564 5.385051459236165 5.55218650057524 5.765620478139155 6.140990692861778 6.68156168217425;
             -2.943481865323865 -2.793682011666347 -2.7120308613215105 -2.506772718605168 -2.39851440603294 -2.1014812457466228 -1.9452101045743528 -1.7041554289774754 -1.4829780613698689 -1.331579442365025 -1.2111003274077823 -0.8544988934261977 -0.5790017475022499 -0.3302088456189407 -0.08744838035713975 0.1673635508237027 0.45239685235540256 0.8039352789696216 1.3240150219349214 1.5194206287846455 1.782378056603685 2.1987886405752803 2.709552813734848 2.8617281816117868 3.0781111816058195 3.431554501112554 3.870563905935619 4.00537363494248 4.1845928150957805 4.503766651874711 4.929971363503398 5.076322471440581 5.27497591999295 5.52791651167518 5.848654461679204]
const TK  = [-1.1926243990804033 -1.1743913869790086 -1.165614653421677 -1.1392512344379109 -1.1246309930698315 -1.0778245813819973 -1.0492276925221506 -0.997028564406416 -0.9412882492496206 -0.89760798562343 -0.8592489056677113 -0.7249815774273046 -0.5958882137218224 -0.4570311457320469 -0.2966615591256979 -0.10064577139947004 0.1579123553929535 0.5374654181056042 1.226650205162352 1.525729224090128 1.9604816602713853 2.7300669502101567 3.7819466008446807 4.115630048318622 4.587277241771445 5.410120147245256 6.504734959614865 6.843435861344368 7.312351411407612 8.204052812351167 9.380908370226425 9.801397342146213 10.210787470021238 11.065700560614296 12.19099994622706;
             -1.5715146874157493 -1.5403073115879633 -1.5169776509733697 -1.4665221836828817 -1.4369102095850903 -1.3496290747813668 -1.298278486773026 -1.2090663275299973 -1.1171538249300548 -1.0478551970728252 -0.9887300447097083 -0.7930787562580826 -0.6164000565514812 -0.4373090523882904 -0.24235866195256756 -0.015768955906757698 0.266294430295106 0.651647823017675 1.3029824265648888 1.57130609192188 1.9479671937433227 2.5878519082111944 3.4484532844903657 3.7110094714746533 4.0901675267103235 4.742903325658023 5.610841153635776 5.869347075916751 6.233093442455383 6.861159495163239 7.605983240042428 7.922087425828266 8.226816741867589 8.961571218345487 9.728780112646144;
             -1.8183619044915538 -1.771831627984931 -1.7438028632206626 -1.6685917204535754 -1.6302858014857406 -1.5093422844644864 -1.4403419931324906 -1.3247658289009951 -1.2083281555973817 -1.1229821929800383 -1.0505714158465091 -0.8173179899230579 -0.6158754729097995 -0.4173931635603583 -0.20656854219311493 0.03167040057514411 0.3179328129178279 0.6988924030220678 1.322675757704026 1.571254330141332 1.9208235093185737 2.5057336598815785 3.2620767489026155 3.494827429906371 3.821580239468593 4.382580788046574 5.103728314536232 5.330640017501667 5.654153205869778 6.197044525573566 6.931220396914263 7.148576870375071 7.383472025041834 7.97419921438612 8.823816150239953;
             -1.993303035023817 -1.9303016465983012 -1.9011878493296313 -1.809597739589436 -1.7604555789343959 -1.6186992259552142 -1.5378205899525677 -1.4012654453161015 -1.2667859059777642 -1.1695165793416582 -1.088073813777657 -0.8302604711079156 -0.6128988275759992 -0.40341742777142625 -0.18391138560002934 0.05895056579716881 0.34774871252504863 0.725472516184709 1.3257863522605307 1.5639734688636224 1.8941870167732415 2.4378536760134804 3.1376765263251087 3.3597689516182268 3.6615766661639153 4.184503386991316 4.855271567764788 5.072227985025396 5.36726178896159 5.8624270357507 6.553832805641278 6.757858279731317 7.027061678867403 7.507509459299485 7.938773729580974;
             -2.2545517114287588 -2.1697075864885766 -2.12187201988376 -2.003627986840613 -1.9429252192242403 -1.7605849774379487 -1.660685147362639 -1.49762799934165 -1.3385380674472869 -1.2255478542919374 -1.132652337293431 -0.8441203984143166 -0.6065848532259017 -0.38245676699262365 -0.1541978126415042 0.09480190481288699 0.38367083538080354 0.7546803845107093 1.3294592722342449 1.5534810345825008 1.860970482439231 2.363961558156508 3.005369250953606 3.206591077178068 3.4759320563848055 3.936940884025735 4.516582521230445 4.709896224583401 4.965706936814709 5.380382889324348 5.902619104241206 6.033874108179298 6.3350817756964535 6.803415323229341 7.179770904792615;
             -2.4559420446701052 -2.34920084792834 -2.296976185073357 -2.1420394469796284 -2.065104153183162 -1.8532671437387496 -1.7404482818386349 -1.5569468645307465 -1.3799823219728318 -1.2560175975037071 -1.1548651680857345 -0.8475332427807808 -0.5986528521480281 -0.3671741298455163 -0.13421441960815286 0.11717930226165238 0.4055787330043358 0.7709422173846994 1.3331585127712215 1.5514648960874131 1.8454640848313635 2.3221910595050956 2.927199195920216 3.112307311387735 3.3678052821501323 3.8019172475687917 4.3616672339609766 4.535393895749832 4.762542611054845 5.174277473836703 5.730501341808227 5.919474059847239 6.235336726812749 6.723206431226167 7.231651147242072;
             -2.569928572144317 -2.4656196647138655 -2.4014758345752387 -2.236262068336355 -2.158516518380391 -1.923673308482446 -1.7979065748604046 -1.5997846679626186 -1.4103321257056918 -1.2784135432631765 -1.1717696031635132 -0.8499132658320339 -0.5943484194811337 -0.35694312707262416 -0.1214887880639173 0.13112100670750404 0.4191870332071281 0.7814077014565005 1.3316584559236748 1.5432210796311716 1.8266084594238228 2.2920622811804403 2.856304977260614 3.0300396084181944 3.2684205939333277 3.664624203263345 4.176983049634148 4.326393560377753 4.552857168246306 4.92111391208768 5.502216961106142 5.636662090993625 5.850215772428456 6.1557325597112955 6.609738032513054;
             -2.972701482938346 -2.812945948534419 -2.7269368807141112 -2.505073465280368 -2.3962139122991437 -2.099707039134466 -1.9451305240930674 -1.7042574729580975 -1.4825402524085212 -1.330796855221357 -1.2110926561356254 -0.8546019664189122 -0.5788952082877374 -0.33039963362646674 -0.08728908009829067 0.16763773343275448 0.4519318546878791 0.8027235952910927 1.3216636485933564 1.5182514190293643 1.7792399561819003 2.196289357383261 2.704436161637905 2.8602211164423332 3.071314876385227 3.4161591494006935 3.856086976310044 3.999063549670627 4.180960369206441 4.484057207941962 4.871734136391144 5.018239006694096 5.187665970098557 5.479057559707716 5.841992903950639]

"""Asymptotic evaluation of the p-value for Anderson-Darling test"""
function pvalueasym(x::KSampleADTest)
    # Computational Details:
    #
    # This function uses the upper Tₘ quantiles as obtained via simulation of
    # the Anderson-Darling test statistics (2*10^6) with sample from standard
    # Normal distribution with size n=500 for each sample.
    #
    # After standardization of AD statistics, p-quantiles were estimated
    # for p ∈ [.00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,
    # .1,.2,.3,.4,.5,.6,.7,.8,.9,.925,.95,.975,.99,.9925,.995,.9975,.999,
    # .99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999]
    #
    # Particular p-quantiles are determined from the simulations for various
    # values of `m`, m ∈ [1,2,3,4,6,8,10,20]. By interpolation, for the given
    # value of `m` from the performed test is done by fitting a cubic polynomial
    # of 1/sqrt(ms) to the simulated Tₘ quantiles.
    #
    # Next, the quadratic polynomial is used to fit the log((1-p)/p) to above
    # interpolated quantiles and the value fitted to the estimated standardized
    # AD statistics Tₘ. If p-value outside of [.00001, .99999] range then
    # linear extrapolation is used.
    #
    # The p-values from Table 1 of the original paper were reproduced with
    # relative error bounded bounded by 1% in 85% of cases (see the relative
    # error table below).
    #
    # m\α   .25      .10     .05     .025     .01
    #  1   0.0001   0.0018  0.0006  0.0112   0.0305
    #  2  -0.0089  -0.0064  0.0024  0.0121   0.0306
    #  3  -0.0068  -0.0026  0.0045  0.0082   0.0164
    #  4  -0.0042  -0.0009  0.0027  0.005    0.0079
    #  6  -0.0025  -0.0005  0.005   0.003   -0.0004
    #  8  -0.0024   0.0011  0.0027  0.0023  -0.0023
    #  10 -0.0038   0.0008  0.0023  0.0026  -0.0043
    #  ∞  -0.0038   0.0014  0.005   0.0037   0.0148
    #

    m = x.k - 1
    Tk = (x.A²k - m) / x.σ
    sqm = 1.0 ./ sqrt.(M)

    T = x.modified ? TKM : TK

    # Construct Tm curve
    n = length(PV)
    Tm = zeros(n)
    for i in 1:n
        A = [ sqm[i]^p for i = 1:length(sqm), p = 0:3 ] # fit in cubic
        C = A \ T[:,i]
        x = 1/sqrt(m)
        Tm[i] = C[1] + C[2]*x + C[3]*x^2 + C[4]*x^3
    end
    logP =  log.((1 .- PV) ./ PV )

    # perform linear extrapolation with p-value outside of [.00001, .99999]
    fitlin = (Tk < Tm[1] || Tk > Tm[end])

    # locate curve area for extrapolation
    _, j = findmin([abs(tm - Tk) for tm in Tm[2:end-1]])
    A = [ Tm[i]^p for i = j:j+2, p = 0:(fitlin ? 1 : 2) ] # fit p-values
    C = A \ logP[j:j+2]
    lp0 = C[1] + C[2]*Tk + (fitlin ? 0.0 : C[3]*Tk^2)
    return exp(lp0)/(1 + exp(lp0))
end

StatsAPI.pvalue(x::KSampleADTest) = x.nsim == 0 ? pvalueasym(x) : pvaluesim(x)

function adkvals(Z⁺, N, samples)
    k = length(samples)
    n = map(length, samples)
    L = length(Z⁺)

    fij = zeros(Int, k, L)
    for i in 1:k
        for s in samples[i]
            fij[i, searchsortedfirst(Z⁺, s)] += 1
        end
    end
    ljs = sum(fij, dims=1)

    A²k = A²km = 0.
    for i in 1:k
        innerm = 0.
        inner = 0.
        Mij = 0.
        Bj = 0.
        for j = 1:L
            lj = ljs[j]
            Mij += fij[i, j]
            Bj += lj
            Maij = Mij - fij[i, j]/2.
            Baj = Bj - lj/2.
            innerm += lj/N * (N*Maij-n[i]*Baj)^2 / (Baj*(N-Baj) - N*lj/4.)
            if j < L
                inner += lj/N * (N*Mij-n[i]*Bj)^2 / (Bj*(N-Bj))
            end
        end
        A²km += innerm / n[i]
        A²k += inner / n[i]
    end
    return A²k, A²km * (N - 1.) / N
end

function a2_ksample(samples, modified, method)
    k = length(samples)
    k < 2 && error("Need at least two samples")

    n = map(length, samples)
    pooled = vcat(samples...)
    Z = sort(pooled)
    N = length(Z)
    Z⁺ = unique(Z)
    L = length(Z⁺)

    L < 2 && error("Need more then 1 observation")
    minimum(n) == 0 && error("One of the samples is empty")

    A²k, A²km = adkvals(Z⁺, N, samples)

    H = sum(map(i->1 ./ i, n))
    h = sum(1 ./ (1:N-1))
    g = 0.
    for i in 1:N-2
        for j in i+1:N-1
            g += 1. / ((N - i) * j)
        end
    end

    a = (4*g - 6)*(k - 1) + (10 - 6*g)*H
    b = (2*g - 4)*k^2 + 8*h*k + (2*g - 14*h - 4)*H - 8*h + 4*g - 6
    c = (6*h + 2*g - 2)*k^2 + (4*h - 4*g + 6)*k + (2*h - 6)*H + 4*h
    d = (2*h + 6)*k^2 - 4*h*k
    σ² = (a*N^3 + b*N^2 + c*N + d) / ((N - 1.) * (N - 2.) * (N - 3.))

    KSampleADTest(k, N, sqrt(σ²), (modified ? A²km : A²k), modified, method, pooled, [n...])
end
