if not modules then modules = { } end modules ['sort-ini'] = {
    version   = 1.001,
    comment   = "companion to sort-ini.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- It took a while to get there, but with Fleetwood Mac's "Don't Stop" playing in
-- the background we sort of got it done.
--
-- The code here evolved from the rather old mkii approach. There we concatinate the
-- key and (raw) entry into a new string. Numbers and special characters get some
-- treatment so that they sort ok. In addition some normalization (lowercasing,
-- accent stripping) takes place and again data is appended ror prepended.
-- Eventually these strings are sorted using a regular string sorter. The relative
-- order of character is dealt with by weighting them. It took a while to figure
-- this all out but eventually it worked ok for most languages, given that the right
-- datatables were provided.
--
-- Here we do follow a similar approach but this time we don't append the
-- manipulated keys and entries but create tables for each of them with entries
-- being tables themselves having different properties. In these tables characters
-- are represented by numbers and sorting takes place using these numbers. Strings
-- are simplified using lowercasing as well as shape codes. Numbers are filtered and
-- after getting an offset they end up at the right end of the spectrum (more clever
-- parser will be added some day). There are definitely more solutions to the
-- problem and it is a nice puzzle to solve.
--
-- In the future more methods can be added, as there is practically no limit to what
-- goes into the tables. For that we will provide hooks.
--
-- Todo: decomposition with specific order of accents, this is relatively easy to
-- do.
--
-- Todo: investigate what standards and conventions there are and see how they map
-- onto this mechanism. I've learned that users can come up with any demand so
-- nothing here is frozen.
--
-- Todo: I ran into the Unicode Collation document and noticed that there are some
-- similarities (like the weights) but using that method would still demand extra
-- code for language specifics. One option is to use the allkeys.txt file for the uc
-- vectors but then we would also use the collapsed key (sq, code is now commented).
-- In fact, we could just hook those into the replacer code that we reun beforehand.
--
-- In the future index entries will become more clever, i.e. they will have language
-- etc properties that then can be used.

local gsub, find, rep, sub, sort, concat, tohash, format = string.gsub, string.find, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
local utfbyte, utfchar, utfcharacters = utf.byte, utf.char, utf.characters
local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
local P, Cs, R, S, lpegmatch, lpegpatterns = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match, lpeg.patterns

local allocate          = utilities.storage.allocate
local setmetatableindex = table.setmetatableindex

local trace_tests       = false  trackers.register("sorters.tests",        function(v) trace_tests        = v end)
local trace_methods     = false  trackers.register("sorters.methods",      function(v) trace_methods      = v end)
local trace_orders      = false  trackers.register("sorters.orders",       function(v) trace_orders       = v end)
local trace_replacements= false  trackers.register("sorters.replacements", function(v) trace_replacements = v end)

local report_sorters    = logs.reporter("languages","sorters")

local comparers         = { }
local splitters         = { }
local definitions       = allocate()
local tracers           = allocate()
local ignoredoffset     = 0x10000 -- frozen
local replacementoffset = 0x10000 -- frozen
local digitsoffset      = 0x20000 -- frozen
local digitsmaximum     = 0xFFFFF -- frozen

local lccodes           = characters.lccodes
local uccodes           = characters.uccodes
local lcchars           = characters.lcchars
local ucchars           = characters.ucchars
local shchars           = characters.shchars
local fscodes           = characters.fscodes
local fschars           = characters.fschars

local decomposed        = characters.decomposed

local variables         = interfaces.variables

local v_numbers         = variables.numbers
local v_default         = variables.default
local v_before          = variables.before
local v_after           = variables.after
local v_first           = variables.first
local v_last            = variables.last

local validmethods      = tohash {
    "ch", -- raw character (for tracing)
    "mm", -- minus mapping
    "zm", -- zero  mapping
    "pm", -- plus  mapping
    "mc", -- lower case - 1
    "zc", -- lower case
    "pc", -- lower case + 1
    "uc", -- unicode
}

local predefinedmethods = {
    [v_default] = "zc,pc,zm,pm,uc",
    [v_before]  = "mm,mc,uc",
    [v_after]   = "pm,mc,uc",
    [v_first]   = "pc,mm,uc",
    [v_last]    = "mc,mm,uc",
}

sorters = {
    comparers    = comparers,
    splitters    = splitters,
    definitions  = definitions,
    tracers      = tracers,
    constants    = {
        ignoredoffset     = ignoredoffset,
        replacementoffset = replacementoffset,
        digitsoffset      = digitsoffset,
        digitsmaximum     = digitsmaximum,
        defaultlanguage   = v_default,
        defaultmethod     = v_default,
        defaultdigits     = v_numbers,
        validmethods      = validmethods,
    }
}

local sorters   = sorters
local constants = sorters.constants

local data, language, method, digits
local replacements, m_mappings, z_mappings, p_mappings, entries, orders, lower, upper, method, sequence, usedinsequence
local thefirstofsplit

local mte = { -- todo: assign to t
    __index = function(t,k)
        if k and k ~= "" and utfbyte(k) < digitsoffset then -- k check really needed (see s-lan-02)
            local el
            if k then
                local l = lower[k] or lcchars[k]
                el = rawget(t,l)
            end
            if not el then
                local l = shchars[k]
                if l and l ~= k then
                    if #l > 1 then
                        l = sub(l,1,1) -- todo
                    end
                    el = rawget(t,l)
                    if not el then
                        l = lower[k] or lcchars[l]
                        if l then
                            el = rawget(t,l)
                        end
                    end
                end
                el = el or k
            end
        --  rawset(t,k,el)
            return el
        else
        --  rawset(t,k,k)
        end
    end
}

local noorder = false
local nothing = { 0 }

local function preparetables(data)
    local orders, lower, m_mappings, z_mappings, p_mappings = data.orders, data.lower, { }, { }, { }
    for i=1,#orders do
        local oi = orders[i]
        local n = { 2 * i }
        m_mappings[oi], z_mappings[oi], p_mappings[oi] = n, n, n
    end
    local mtm = {
        __index = function(t,k)
            local n, nn
            if k then
                if trace_orders then
                    report_sorters("simplifing character %C",k)
                end
                local l = lower[k] or lcchars[k]
                if l then
                    if trace_orders then
                        report_sorters(" 1 lower: %C",l)
                    end
                    local ml = rawget(t,l)
                    if ml then
                        n = { }
                        nn = 0
                        for i=1,#ml do
                            nn = nn + 1
                            n[nn] = ml[i] + (t.__delta or 0)
                        end
                        if trace_orders then
                            report_sorters(" 2 order: % t",n)
                        end
                    end
                end
                if not n then
                    local s = shchars[k] -- maybe all components?
                    if s and s ~= k then
                        if trace_orders then
                            report_sorters(" 3 shape: %C",s)
                        end
                        n = { }
                        nn = 0
                        for l in utfcharacters(s) do
                            local ml = rawget(t,l)
                            if ml then
                                if trace_orders then
                                    report_sorters(" 4 keep: %C",l)
                                end
                                if ml then
                                    for i=1,#ml do
                                        nn = nn + 1
                                        n[nn] = ml[i]
                                    end
                                end
                            else
                                local l = lower[l] or lcchars[l]
                                if l then
                                    if trace_orders then
                                        report_sorters(" 5 lower: %C",l)
                                    end
                                    local ml = rawget(t,l)
                                    if ml then
                                        for i=1,#ml do
                                            nn = nn + 1
                                            n[nn] = ml[i] + (t.__delta or 0)
                                        end
                                    end
                                end
                            end
                        end
                    else
                        -- this is a kind of last resort branch that we might want to revise
                        -- one day
                        --
                        -- local b = utfbyte(k)
                        -- n = decomposed[b] or { b }
                        -- if trace_tests then
                        --     report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
                        -- end
                        --
                        -- we need to move way above valid order (new per 2014-10-16) .. maybe we
                        -- need to move it even more up to get numbers right (not all have orders)
                        --
                        if k == "\000" then
                            n = nothing -- shared
                            if trace_orders then
                                report_sorters(" 6 split: space") -- todo
                            end
                        else
                            local b = 2 * #orders + utfbyte(k)
                            n = decomposed[b] or { b } -- could be shared tables
                            if trace_orders then
                                report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
                            end
                        end
                    end
                    if n then
                        if trace_orders then
                            report_sorters(" 7 order: % t",n)
                        end
                    else
                        n = noorder
                        if trace_orders then
                            report_sorters(" 8 order: 0")
                        end
                    end
                end
            else
                n = noorder
                if trace_orders then
                    report_sorters(" 9 order: 0")
                end
            end
            rawset(t,k,n)
            return n
        end
    }
    data.m_mappings = m_mappings
    data.z_mappings = z_mappings
    data.p_mappings = p_mappings
    m_mappings.__delta = -1
    z_mappings.__delta =  0
    p_mappings.__delta =  1
    setmetatable(data.entries,mte)
    setmetatable(data.m_mappings,mtm)
    setmetatable(data.z_mappings,mtm)
    setmetatable(data.p_mappings,mtm)
    thefirstofsplit = data.firstofsplit
end

local function update() -- prepare parent chains, needed when new languages are added
    for language, data in next, definitions do
        local parent = data.parent or "default"
        if language ~= "default" then
            setmetatableindex(data,definitions[parent] or definitions.default)
        end
        data.language   = language
        data.parent     = parent
        data.m_mappings = { } -- free temp data
        data.z_mappings = { } -- free temp data
        data.p_mappings = { } -- free temp data
    end
end

local function setlanguage(l,m,d,u) -- this will become a specification table (also keep this one as it's used in manuals)
    language = (l ~= "" and l) or constants.defaultlanguage
    data     = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage]
    if not data then
        report_sorters("unknown language %a",language)
        data = definitions.en
    end
    method   = (m ~= "" and m) or (data.method ~= "" and data.method) or constants.defaultmethod
    digits   = (d ~= "" and d) or (data.digits ~= "" and data.digits) or constants.defaultdigits
    if trace_tests then
        report_sorters("setting language %a, method %a, digits %a",language,method,digits)
    end
    replacements = data.replacements
    entries      = data.entries
    orders       = data.orders
    lower        = data.lower
    upper        = data.upper
    preparetables(data)
    m_mappings   = data.m_mappings
    z_mappings   = data.z_mappings
    p_mappings   = data.p_mappings
    --
    method = predefinedmethods[variables[method]] or method
    data.method  = method
    --
    data.digits  = digits
    --
    local seq = utilities.parsers.settings_to_array(method or "") -- check the list
    sequence = { }
    local nofsequence = 0
    for i=1,#seq do
        local s = seq[i]
        if validmethods[s] then
            nofsequence = nofsequence + 1
            sequence[nofsequence] = s
        else
            report_sorters("invalid sorter method %a in %a",s,method)
        end
    end
    usedinsequence = tohash(sequence)
    data.sequence = sequence
    data.usedinsequence = usedinsequence
-- usedinsequence.ch = true -- better just store the string
    if trace_tests then
        report_sorters("using sort sequence: % t",sequence)
    end
    --
    return data
end

function sorters.update()
    update()
    setlanguage(language,method,numberorder) -- resync current language and method
end

function sorters.setlanguage(language,method,numberorder)
    update()
    setlanguage(language,method,numberorder) -- new language and method
end

-- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them

-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)

local function basicsort(sort_a,sort_b)
    if sort_a and sort_b then
        local na = #sort_a
        local nb = #sort_b
        if na > nb then
            na = nb
        end
        if na > 0 then
            for i=1,na do
                local ai, bi = sort_a[i], sort_b[i]
                if ai > bi then
                    return  1
                elseif ai < bi then
                    return -1
                end
            end
        end
    end
    return 0
end

-- todo: compile compare function

local function basic(a,b) -- trace ea and eb
    if a == b then
        -- hashed (shared) entries
        return 0
    end
    local ea = a.split
    local eb = b.split
    local na = #ea
    local nb = #eb
    if na == 0 and nb == 0 then
        -- simple variant (single word)
        local result = 0
        for j=1,#sequence do
            local m = sequence[j]
            result = basicsort(ea[m],eb[m])
            if result ~= 0 then
                return result
            end
        end
        if result == 0 then
            local la = #ea.uc
            local lb = #eb.uc
            if la > lb then
                return 1
            elseif lb > la then
                return -1
            else
                return 0
            end
        else
            return result
        end
    else
        -- complex variant, used in register (multiple words)
        local result = 0
        for i=1,nb < na and nb or na do
            local eai = ea[i]
            local ebi = eb[i]
            for j=1,#sequence do
                local m = sequence[j]
                result = basicsort(eai[m],ebi[m])
                if result ~= 0 then
                    return result
                end
            end
            if result == 0 then
                local la = #eai.uc
                local lb = #ebi.uc
                if la > lb then
                    return 1
                elseif lb > la then
                    return -1
                end
            else
                return result
            end
        end
        if result ~= 0 then
            return result
        elseif na > nb then
            return 1
        elseif nb > na then
            return -1
        else
            return 0
        end
    end
end

-- if we use sq:
--
-- local function basic(a,b) -- trace ea and eb
--     local ea, eb = a.split, b.split
--     local na, nb = #ea, #eb
--     if na == 0 and nb == 0 then
--         -- simple variant (single word)
--         return basicsort(ea.sq,eb.sq)
--     else
--         -- complex variant, used in register (multiple words)
--         local result = 0
--         for i=1,nb < na and nb or na do
--             local eai, ebi = ea[i], eb[i]
--             result = basicsort(ea.sq,eb.sq)
--             if result ~= 0 then
--                 return result
--             end
--         end
--         if result ~= 0 then
--             return result
--         elseif na > nb then
--             return 1
--         elseif nb > na then
--             return -1
--         else
--             return 0
--         end
--     end
-- end

comparers.basic = basic

function sorters.basicsorter(a,b)
    return basic(a,b) == -1
end

local function numify(old)
    if digits == v_numbers then -- was swapped, fixed 2014-11-10
        local new = digitsoffset + tonumber(old) -- alternatively we can create range
        if new > digitsmaximum then
            new = digitsmaximum
        end
        return utfchar(new)
    else
        return old
    end
end

local pattern = nil

local function prepare() -- todo: test \Ux{hex}
    pattern = Cs( (
        characters.tex.toutfpattern()
      + lpeg.patterns.whitespace / "\000"
      + (P("\\Ux{") / "" * ((1-P("}"))^1/function(s) return utfchar(tonumber(s,16)) end) * (P("}")/""))
      + (P("\\") / "") * R("AZ")^0 * (P(-1) + #(1-R("AZ")))
      + (P("\\") * P(1) * R("az","AZ")^0) / ""
      + S("[](){}$\"'") / ""
      + R("09")^1 / numify
      + P(1)
    )^0 )
    return pattern
end

local function strip(str) -- todo: only letters and such
    if str and str ~= "" then
        return lpegmatch(pattern or prepare(),str)
    else
        return ""
    end
end

sorters.strip = strip

local function firstofsplit(entry)
    -- numbers are left padded by spaces
    local split = entry.split
    if #split > 0 then
        split = split[1].ch
    else
        split = split.ch
    end
    local first = split and split[1] or ""
    if thefirstofsplit then
        return thefirstofsplit(first,data,entry) -- normally the first one is needed
    else
        return first, entries[first] or "\000" -- tag
    end
end

sorters.firstofsplit = firstofsplit

-- for the moment we use an inefficient bunch of tables but once
-- we know what combinations make sense we can optimize this

function splitters.utf(str,checked) -- we could append m and u but this is cleaner, s is for tracing
    local nofreplacements = #replacements
    if nofreplacements > 0 then
        -- todo make an lpeg for this
        local replacer = replacements.replacer
        if not replacer then
            local rep = { }
            for i=1,nofreplacements do
                local r = replacements[i]
                rep[strip(r[1])] = strip(r[2])
            end
            replacer = lpeg.utfchartabletopattern(rep)
            replacer = Cs((replacer/rep + lpegpatterns.utf8character)^0)
            replacements.replacer = replacer
        end
        local rep = lpegmatch(replacer,str)
        if rep and rep ~= str then
            if trace_replacements then
                report_sorters("original   : %s",str)
                report_sorters("replacement: %s",rep)
            end
            str = rep
        end
     -- for k=1,#replacements do
     --     local v = replacements[k]
     --     local s = v[1]
     --     if find(str,s) then
     --         str = gsub(str,s,v[2])
     --     end
     -- end
    end
    local m_case    = { }
    local z_case    = { }
    local p_case    = { }
    local m_mapping = { }
    local z_mapping = { }
    local p_mapping = { }
    local char      = { }
    local byte      = { }
    local n         = 0
    local nm        = 0
    local nz        = 0
    local np        = 0
    for sc in utfcharacters(str) do
        local b = utfbyte(sc)
        if b >= digitsoffset then
            if n == 0 then
                -- we need to force number to the top
                z_case[1] = 0
                m_case[1] = 0
                p_case[1] = 0
                char[1] = sc
                byte[1] = 0
                m_mapping[1] = 0
                z_mapping[1] = 0
                p_mapping[1] = 0
                n = 2
            else
                n = n + 1
            end
            z_case[n] = b
            m_case[n] = b
            p_case[n] = b
            char[n] = sc
            byte[n] = b
            nm = nm + 1
            nz = nz + 1
            np = np + 1
            m_mapping[nm] = b
            z_mapping[nz] = b
            p_mapping[np] = b
        else
            n = n + 1
            local l = lower[sc]
            l = l and utfbyte(l) or lccodes[b] or b
         -- local u = upper[sc]
         -- u = u and utfbyte(u) or uccodes[b] or b
            if type(l) == "table" then
                l = l[1] -- there are currently no tables in lccodes but it can be some, day
            end
         -- if type(u) == "table" then
         --     u = u[1] -- there are currently no tables in lccodes but it can be some, day
         -- end
            z_case[n] = l
            if l ~= b then
                m_case[n] = l - 1
                p_case[n] = l + 1
            else
                m_case[n] = l
                p_case[n] = l
            end
            char[n], byte[n] = sc, b
            local fs = fscodes[b] or b
            local msc = m_mappings[sc]
            if msc ~= noorder then
                if not msc then
                    msc = m_mappings[fs]
                end
                for i=1,#msc do
                    nm = nm + 1
                    m_mapping[nm] = msc[i]
                end
            end
            local zsc = z_mappings[sc]
            if zsc ~= noorder then
                if not zsc then
                    zsc = z_mappings[fs]
                end
                for i=1,#zsc do
                    nz = nz + 1
                    z_mapping[nz] = zsc[i]
                end
            end
            local psc = p_mappings[sc]
            if psc ~= noorder then
                if not psc then
                    psc = p_mappings[fs]
                end
                for i=1,#psc do
                    np = np + 1
                    p_mapping[np] = psc[i]
                end
            end
        end
    end
    -- -- only those needed that are part of a sequence
    --
    -- local b = byte[1]
    -- if b then
    --     -- we set them to the first split code (korean)
    --     local fs = fscodes[b] or b
    --     if #m_mapping == 0 then
    --         m_mapping = { m_mappings[fs][1] }
    --     end
    --     if #z_mapping == 0 then
    --         z_mapping = { z_mappings[fs][1] }
    --     end
    --     if #p_mapping == 0 then
    --         p_mapping = { p_mappings[fs][1] }
    --     end
    -- end
    local result
    if checked then
        result = {
            ch = trace_tests       and char      or nil, -- not in sequence
            uc = usedinsequence.uc and byte      or nil,
            mc = usedinsequence.mc and m_case    or nil,
            zc = usedinsequence.zc and z_case    or nil,
            pc = usedinsequence.pc and p_case    or nil,
            mm = usedinsequence.mm and m_mapping or nil,
            zm = usedinsequence.zm and z_mapping or nil,
            pm = usedinsequence.pm and p_mapping or nil,
        }
    else
        result = {
            ch = char,
            uc = byte,
            mc = m_case,
            zc = z_case,
            pc = p_case,
            mm = m_mapping,
            zm = z_mapping,
            pm = p_mapping,
        }
    end
 -- local sq, n = { }, 0
 -- for i=1,#byte do
 --     for s=1,#sequence do
 --         n = n + 1
 --         sq[n] = result[sequence[s]][i]
 --     end
 -- end
 -- result.sq = sq
    return result
end

local function packch(entry)
    local split = entry.split
    if split and #split > 0 then -- useless test
        local t = { }
        for i=1,#split do
            local tt = { }
            local ch = split[i].ch
            for j=1,#ch do
                local chr = ch[j]
                local byt = utfbyte(chr)
                if byt > ignoredoffset then
                    tt[j] = "[]"
                elseif byt == 0 then
                    tt[j] = " "
                else
                    tt[j] = chr
                end
            end
            t[i] = concat(tt)
        end
        return concat(t," + ")
    else
        local t  = { }
        local ch = (split and split.ch) or entry.ch or entry
        if ch then
            for i=1,#ch do
                local chr = ch[i]
                local byt = utfbyte(chr)
                if byt > ignoredoffset then
                    t[i] = "[]"
                elseif byt == 0 then
                    t[i] = " "
                else
                    t[i] = chr
                end
            end
            return concat(t)
        else
            return ""
        end
    end
end

local function packuc(entry)
    local split = entry.split
    if split and #split > 0 then -- useless test
        local t = { }
        for i=1,#split do
            t[i] = concat(split[i].uc, " ") -- sq
        end
        return concat(t," + ")
    else
        local uc = (split and split.uc) or entry.uc or entry
        if uc then
            return concat(uc," ") -- sq
        else
            return ""
        end
    end
end

sorters.packch = packch
sorters.packuc = packuc

function sorters.sort(entries,cmp)
    if trace_methods then
        local nofentries = #entries
        report_sorters("entries: %s, language: %s, method: %s, digits: %s",nofentries,language,method,tostring(digits))
        for i=1,nofentries do
            report_sorters("entry %s",table.serialize(entries[i].split,i,true,true,true))
        end
    end
    if trace_tests then
        sort(entries,function(a,b)
            local r = cmp(a,b)
            local e = (not r and "?") or (r<0 and "<") or (r>0 and ">") or "="
            report_sorters("%s %s %s | %s %s %s",packch(a),e,packch(b),packuc(a),e,packuc(b))
            return r == -1
        end)
        local s
        for i=1,#entries do
            local entry = entries[i]
            local letter, first = firstofsplit(entry)
            if first == s then
                first = "  "
            else
                s = first
                if first and letter then
                    report_sorters(">> %C (%C)",first,letter)
                end
            end
            report_sorters("   %s | %s",packch(entry),packuc(entry))
        end
    else
        sort(entries,function(a,b)
            return cmp(a,b) == -1
        end)
    end
end

-- helper

function sorters.replacementlist(list)
    local replacements = { }
    for i=1,#list do
        replacements[i] = {
            list[i],
            utfchar(replacementoffset+i),
        }
    end
    return replacements
end