モジュール:ain-kana-conv

出典: フリー多機能辞典『ウィクショナリー日本語版(Wiktionary)』
ナビゲーションに移動 検索に移動

このモジュールについての説明文ページを モジュール:ain-kana-conv/doc に作成できます

local export = {}

local CONSONANTS = {
    "p", "t", "c", "k",
    "m", "n", "s", "h",
    "w", "r", "y", "'"
}
local VOWELS = {
    "a", "i", "u", "e", "o",
    "á", "í", "ú", "é", "ó",
}
local ACCENT_CONVERSION_TABLE = {
    ["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o",
}

local CONVERSION_TABLE = {
    [ "a"] = "ア",  [ "i"] = "イ", [ "u"] = "ウ", [ "e"] = "エ", [ "o"] = "オ",
    ["'a"] = "ア",  ["'i"] = "イ", ["'u"] = "ウ", ["'e"] = "エ", ["'o"] = "オ",
    ["ka"] = "カ",  ["ki"] = "キ", ["ku"] = "ク", ["ke"] = "ケ", ["ko"] = "コ",
    ["sa"] = "サ",  ["si"] = "シ", ["su"] = "ス", ["se"] = "セ", ["so"] = "ソ",
    ["ta"] = "タ",                 ["tu"] = "ト゚", ["te"] = "テ", ["to"] = "ト",
    ["ca"] = "チャ", ["ci"] = "チ", ["cu"] = "チュ", ["ce"] = "チェ", ["co"] = "チョ",
    ["CA"] = "サ゚",               ["CU"] = "ス゚", ["CE"] = "セ゚", ["CO"] = "ソ゚",
    ["na"] = "ナ",  ["ni"] = "ニ", ["nu"] = "ヌ", ["ne"] = "ネ", ["no"] = "ノ",
    ["ha"] = "ハ",  ["hi"] = "ヒ", ["hu"] = "フ", ["he"] = "ヘ", ["ho"] = "ホ",
    ["pa"] = "パ",  ["pi"] = "ピ", ["pu"] = "プ", ["pe"] = "ペ", ["po"] = "ポ",
    ["ma"] = "マ",  ["mi"] = "ミ", ["mu"] = "ム", ["me"] = "メ", ["mo"] = "モ",
    ["ya"] = "ヤ",  ["yi"] = "イ", ["yu"] = "ユ", ["ye"] = "イェ", ["yo"] = "ヨ",
    ["ra"] = "ラ",  ["ri"] = "リ", ["ru"] = "ル", ["re"] = "レ", ["ro"] = "ロ",
    ["wa"] = "ワ",  ["wi"] = "ヰ",                ["we"] = "ヱ", ["wo"] = "ヲ",
    ["nn"] = "ン",  ["tt"] = "ッ"
}

local CODA_CONS = {
    ["w"] = "ゥ", ["y"] = "ィ",
    ["m"] = "ㇺ", ["n"] = "ㇴ", ["N"] = "𛅧",
    ["s"] = "ㇱ",["S"] = "ㇲ",
    ["p"] = "ㇷ゚", ["t"] = "ッ", ["T"] = "ㇳ", ["k"] = "ㇰ"
}

local CODA_VARA = {
    ["r"] = {
        ["a"] = "ㇻ", ["i"] = "ㇼ", ["u"] = "ㇽ", ["e"] = "ㇾ", ["o"] = "ㇿ"
    },
    ["h"] = {
        ["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
    },
    ["x"] = {
        ["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
    }
}

local VARIANT_TABLE = {
    -- tu
    ["ト゚"] = { "ツ゚", "トゥ" },
    -- -n
    ["ㇴ"] = { "ン" },
    -- wV -> wlV
    ["ヱ"] = { "ウェ" },
    ["ヰ"] = { "ウィ" },
    ["ヲ"] = { "ウォ" },
    -- pp -> tp
    ["ㇷ゚パ"] = { "ッパ" },
    ["ㇷ゚ピ"] = { "ッピ" },
    ["ㇷ゚ペ"] = { "ッペ" },
    ["ㇷ゚プ"] = { "ップ" },
    ["ㇷ゚ポ"] = { "ッポ" },
    -- kk -> tk
    ["ㇰカ"] = { "ッカ" },
    ["ㇰキ"] = { "ッキ" },
    ["ㇰケ"] = { "ッケ" },
    ["ㇰク"] = { "ック" },
    ["ㇰコ"] = { "ッコ" },
    -- -y -> i
    ["ィ"] = { "イ" },
    -- -w -> u
    ["ゥ"] = { "ウ" },
    -- -r(_a/_i/_e/_o) -> -r(_u)
    ["ㇻ"] = { "ㇽ" },
    ["ㇼ"] = { "ㇽ" },
    ["ㇾ"] = { "ㇽ" },
    ["ㇿ"] = { "ㇽ" },
}

function in_values(item, items)
    for _, v in pairs(items) do
        if v == item then
            return true
        end
    end
    return false
end

function in_keys(item, items)
    for k, _ in pairs(items) do
        if k == item then
            return true
        end
    end
    return false
end


local function convert_syllable(syllable, next_char)
    local l_syllable = mw.ustring.len(syllable)

    local remains = syllable
    local coda = ""

    local last_char = mw.ustring.sub(syllable, mw.ustring.len(syllable), mw.ustring.len(syllable))

    if in_keys(last_char, CODA_CONS) then
        remains = mw.ustring.sub(remains, 1, l_syllable - 1)
        coda = CODA_CONS[last_char]
        -- ruunpe ルウㇴペ?ルウンペ? 暫定的に後者を取る
        if last_char == "n" and (next_char ~= nil and next_char ~= "") then
            coda = CONVERSION_TABLE['nn']
        end
    elseif in_keys(last_char, CODA_VARA) then
        remains = mw.ustring.sub(remains, 1, l_syllable - 1)
        local second_last_char = mw.ustring.sub(syllable, l_syllable - 1, l_syllable - 1)
        coda = CODA_VARA[last_char][second_last_char]
    end

    local accented_flag = false
    local nucleus = mw.ustring.sub(remains, mw.ustring.len(remains), mw.ustring.len(remains))
    
    if in_keys(nucleus, ACCENT_CONVERSION_TABLE) then
        accented_flag = true
        remains = mw.ustring.sub(remains, 1, mw.ustring.len(remains) - 1) .. ACCENT_CONVERSION_TABLE[nucleus]
    end

    

    if in_keys(remains, CONVERSION_TABLE) then
        remains = CONVERSION_TABLE[remains]
    elseif in_keys(mw.ustring.lower(remains), CONVERSION_TABLE) then
        remains = CONVERSION_TABLE[mw.ustring.lower(remains)]
    else
        error("cannot find katakana for CV pair: ‘" .. remains .. "’")
    end

    local converted = remains .. coda

    if accented_flag then
        converted = "<u style='text-decoration:overline;'>" .. converted .. "</u>"
    end
    return converted
end

---@param result string
---@param variant_keys string[]
---@param index number
local function apply_variants(result, variant_keys, index)
    if index > #variant_keys then
        return { result }
    end

    local original = variant_keys[index]
    local variations = VARIANT_TABLE[original]
    local all_results = { result }

    for _, variation in ipairs(variations) do
        local new_result = result:gsub(original, variation)
        table.insert(all_results, new_result)
    end

    local final_results = {}
    for _, res in ipairs(all_results) do
        local sub_results = apply_variants(res, variant_keys, index + 1)
        for _, sub_res in ipairs(sub_results) do
            table.insert(final_results, sub_res)
        end
    end

    return final_results
end

---@param target string
local function generate_variants(target)
    local variant_keys = {}
    for original, _ in pairs(VARIANT_TABLE) do
        if target:find(original) then
            table.insert(variant_keys, original)
        end
    end

    return apply_variants(target, variant_keys, 1)
end

local function do_convert(temp)
-- function export.do_convert(temp)
    -- Extensibility を考慮せねばならない
    -- # N ン 記号
    -- # Pawci-Kamuy 固有名詞
    -- # accent 記号やその他特別表記 %u %l はそれらを含む。%a はひらがな・カタカナも含むのでダメ。
    local ignore_chars = "%-=."
    local valid_pattern = "[%u%l'" .. ignore_chars .. "]+"
    
    -- TODO: hotne = ホッネ?ホㇳネ?
    -- TODO: wan e-tu ワㇴ エト゚? ワネト゚? 

    -- If contains more than alphabets and symbols -> e.g. {{ain-kana|hoy'oy|ヒオイオイ}} -> カナ表記 ヒオィオィ/ヒオイオイ
    -- -- Dectect カタカタ if detected do nothing but return the original value
    -- CONVERSION_TABLE にあるかどうかだけで判断するのは早計すぎたので、変更
    if not mw.ustring.match(temp, valid_pattern) then
        return temp
    end

    -- if in_values(mw.ustring.sub(temp, 1, 1), CONVERSION_TABLE) then
    --     return temp
    -- end

    -- Normalize
    -- # Lower
    -- temp = temp:lower() N のためにここで lower しない
    -- # Clear special characters such as "-", ".", "="
    temp = mw.ustring.gsub(temp, "[" .. ignore_chars .. "]", "")
    
    local group_ids = {}

    -- TODO: iyayiraykere = イヤィイラィケレ ? イヤイラィケレ? 暫定的に後者を取る

    -- Count syllables by vowels and save to a map with onset and nucleus marked
    local syllable_count = 1
    local i = 1
    for char in mw.ustring.gmatch(temp, ".") do
        -- print("Current Char (No. " .. tostring(i) .. "): " .. char)
        if in_values(char, VOWELS) then
            -- print("-- Current Vowel: " .. char)
            -- print("-- Char Before: " .. temp:sub(i - 1, i - 1))
            local char_before = mw.ustring.sub(temp, i - 1, i - 1)
            if in_values(char_before, CONSONANTS) or in_values(mw.ustring.lower(char_before), CONSONANTS) then
                -- print("---- Char Before is Consonant")
                group_ids[i - 1] = syllable_count
            end
            group_ids[i] = syllable_count

            syllable_count = syllable_count + 1
        end
        i = i + 1
    end

    local str_buffer = ""
    for i = 1, mw.ustring.len(temp) do
        if group_ids[i] ~= nil then
            str_buffer = str_buffer .. group_ids[i]
        else
            str_buffer = str_buffer .. "X"
        end
    end
    -- error("group_indicies: " .. str_buffer .. "<br>" .. "group_contents: " .. temp)

    -- Fill codas
    local i = 1

    for char in mw.ustring.gmatch(temp, ".") do
        if group_ids[i] == nil then
            group_ids[i] = group_ids[i - 1]
        end
        i = i + 1
    end

    local result = ""
    local current_group_id = 1
    local head = 1
    local tail = 1
    local content = ""

    for i = 1, mw.ustring.len(temp) do
        if group_ids[i] ~= current_group_id then
            current_group_id = group_ids[i]
            tail = i - 1
            content = mw.ustring.sub(temp, head, tail)
            result = result .. convert_syllable(content, mw.ustring.sub(temp, i, i))
            head = i
        end
    end
    content = mw.ustring.sub(temp, head, mw.ustring.len(temp))
    result = result .. convert_syllable(content)

    return result
end

-- local function valid_ainu_word(word)
--     -- TODO:
-- end


function export.debug(word)
    return do_convert(word)
end

function export.convert(frame)
    -- Get Args
    local params = {
        [1] = {list = true, allow_holes = true}
    }
    local args = require("Module:parameters").process(frame:getParent().args, params)

    -- Do Conversion
    local items = {}
    local unqiue = {}
    for i = 1, math.max(args[1].maxindex, 1) do
        local original_str = args[1][i]
        if not original_str or original_str == "" then
            original_str = mw.title.getCurrentTitle().text
        -- else
            -- original_str = frame:preprocess(original_str)
        end

        local converted_words = {}
        for word in mw.text.gsplit(original_str, " ") do
            -- error(do_convert(word))
            table.insert(converted_words, do_convert(word))
        end
        local converted_str = table.concat(converted_words, " ")

        local variants = generate_variants(converted_str)
        for _, variant in ipairs(variants) do
            if not unqiue[variant] then
                unqiue[variant] = true
                table.insert(items, variant)
            end
        end
    end

    return table.concat(items, "/")
end

function export.no_variants(frame)
    -- Get Args
    local params = {
        [1] = { list = true, allow_holes = true }
    }
    local args = require("Module:parameters").process(frame:getParent().args, params)

    -- Do Conversion
    local items = {}
    local unqiue = {}
    for i = 1, math.max(args[1].maxindex, 1) do
        local original_str = args[1][i]
        if not original_str or original_str == "" then
            original_str = mw.title.getCurrentTitle().text
        end

        local converted_words = {}
        for word in mw.text.gsplit(original_str, " ") do
            table.insert(converted_words, do_convert(word))
        end
        local converted_str = table.concat(converted_words, " ")

        if not unqiue[converted_str] then
            unqiue[converted_str] = true
            table.insert(items, converted_str)
        end
    end

    return table.concat(items, "/")
end

return export