モジュール:Tibt-translit
表示
このモジュールについての説明文ページを モジュール:Tibt-translit/doc に作成できます
local m_str_utils = require("モジュール:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local upper = m_str_utils.upper
local u = m_str_utils.char
local Tibt = require("モジュール:Tibt-common")
local export = {}
local twoChars = {
["རྀ"] = "ṛ", ["ྲྀ"] = "ṛ", -- Primarily used in Sanskrit(-derived) borrowings.
["ལྀ"] = "ḷ", ["ླྀ"] = "ḷ",
["ཕ༹"] = "f", ["བ༹"] = "v", -- Used to transliterate Chinese.
["ཁ༹"] = "x", ["ག༹"] = "ġ", -- Used in Balti.
["ྥ༹"] = "f", ["ྦ༹"] = "v",
["ྑ༹"] = "x", ["ྒ༹"] = "ġ",
}
local oneChar = {
["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng",
["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny",
["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n",
["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m",
["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = ".w",
["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "'", ["ཡ"] = ".y",
["ར"] = ".r", ["ཪ"] = ".r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s",
["ཧ"] = "h", ["ཨ"] = "\1",
["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ",
["ཫ"] = "q", ["ཬ"] = "ṛ", -- Used in Balti.
["྅"] = "ʼ", ["ྈ"] = "x", ["ྉ"] = "f", ["ྌ"] = "f", -- Used in Sanskrit.
-- Use Unicode references for non-spacing characters to avoid making it unreadable when edited in a text file.
[u(0x0F71)] = u(0x0304), [u(0x0F72)] = "i", [u(0x0F80)] = "ị", [u(0x0F74)] = "u", [u(0x0F7A)] = "e",
[u(0x0F7B)] = "ai", [u(0x0F7C)] = "o", [u(0x0F7D)] = "au",
[u(0x0F7E)] = "ṃ", [u(0x0F82)] = "ṃ", [u(0x0F83)] = "m̐", ["ཿ"] = "ḥ",
[u(0x0F90)] = "k", [u(0x0F91)] = "kh", [u(0x0F92)] = "g", [u(0x0F94)] = "ng",
[u(0x0F95)] = "c", [u(0x0F96)] = "ch", [u(0x0F97)] = "j", [u(0x0F99)] = "ny",
[u(0x0F9F)] = "t", [u(0x0FA0)] = "th", [u(0x0FA1)] = "d", [u(0x0FA3)] = "n",
[u(0x0FA4)] = "p", [u(0x0FA5)] = "ph", [u(0x0FA6)] = "b", [u(0x0FA8)] = "m",
[u(0x0FA9)] = "ts", [u(0x0FAA)] = "tsh", [u(0x0FAB)] = "dz", [u(0x0FAD)] = "w", [u(0x0FBA)] = "w",
[u(0x0FAE)] = "zh", [u(0x0FAF)] = "z", [u(0x0FB0)] = "'", [u(0x0FB1)] = "y", [u(0x0FBB)] = "y",
[u(0x0FB2)] = "r", [u(0x0FBC)] = "r", [u(0x0FB3)] = "l", [u(0x0FB4)] = "sh", [u(0x0FB6)] = "s",
[u(0x0FB7)] = "h", [u(0x0FB8)] = "+a",
[u(0x0F9A)] = "ṭ", [u(0x0F9B)] = "ṭh", [u(0x0F9C)] = "ḍ", [u(0x0F9E)] = "ṇ", [u(0x0FB5)] = "ṣ",
[u(0x0F8D)] = "x", [u(0x0F8E)] = "f", [u(0x0F8F)] = "f",
}
local symbol = {
["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4",
["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9",
["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5",
["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5",
["་"] = " ", ["༌"] = "*", ["།"] = ".", ["༎"] = ". ¶", ["༏"] = ";",
["༑"] = "|", ["༈"] = "!", ["༔"] = ":", ["༼"] = "(", ["༽"] = ")",
["༺"] = "<", ["༻"] = ">",
[" "] = "\1"
}
function export.tr(text, lang, sc)
local langObj; if not lang then
error("言語コードが必要です")
else
langObj = require("モジュール:languages").getByCode(lang)
end
local scObj = require("モジュール:scripts").getByCode("Tibt")
text = gsub(text, "༒", "།")
text = gsub(text, "[་༌]+$", "")
for word in Tibt.getWords(text) do
for syllable in Tibt.getSyllables(word) do
local tr = syllable
tr = gsub(tr, "(ཱ)([ིེུ-ཽྀྲླ]+)", "%2%1")
local mainStack = gsub(Tibt.findMainStack(syllable, lang), "[ཾཿྂྃ]", "")
if match(mainStack, "([^ༀི-ཽྀ]ཱ?)$") then
local newMainStack = mainStack .. "a"
newMainStack = gsub(newMainStack, "ཱa$", "aཱ")
tr = gsub(tr, mainStack, newMainStack, 1)
end
tr = gsub(tr, "^(.*)༷(.*)$", "<u>%1%2</u>")
tr = gsub(tr, "^(.*)༵(.*)$", "<span style=\"text-decoration-style:double;\">%1%2</span>")
for letter, replacement in pairs(twoChars) do
tr = gsub(tr, letter, replacement)
end
tr = gsub(tr, ".", oneChar)
tr = gsub(tr, "(.')([^aāeiīoḷḹṛṝuū%-<])", "%1a%2")
tr = gsub(tr, "%f[^%zaāeiīoḷḹṛṝuū%->]%.", "")
tr = gsub(tr, "%.([rwy][^aāeiīoḷḹṛṝuū])", "%1")
tr = gsub(tr, "^\1", "")
tr = tr:gsub("\1", "%.")
text = gsub(text, syllable, tr, 1)
end
end
text = gsub(text, u(0x0F84) .. "a?", "")
text = gsub(text, ".", symbol)
text = gsub(text, " ' ", "")
text = gsub(text, " *· *·? *", " · ")
text = gsub(text, " *%.", ".")
text = gsub(text, "\n+", "\n\n")
text = gsub(text, "\n\n$", "")
if match(text, "%. ") or match(text, "%.\n.") or match(text, "%.$") then
text = gsub(text, "^'?.", upper)
text = gsub(text, "\n\n'?.", upper)
text = gsub(text, "%. '?.", upper)
end
-- East Bodish-specific fixes
local east_bodish = { ["xkz"] = true, ["kjz"] = true, ["xkf"] = true, ["dzl"] = true, ["dka"] = true }
if east_bodish[lang] then
if lang == "xkz" then
text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
text = gsub(text, "%'%f[aeiouāēīōū]", "")
text = gsub(text, "q", "'")
text = gsub(text, "̄", "̂")
text = gsub(text, "̂́", "́̂")
text = gsub(text, "([bcdfghjklmnprstvwxyz]+[aeiou])́", "'%1")
text = gsub(text, "([aeiouâêîôû]n?[bcdfghjklmnprstvwxyz])a", "%1")
elseif lang == "dzl" then
text = gsub(text, "g%.y", "'y")
text = gsub(text, "sn", "'n")
text = gsub(text, "dbr", "'r")
text = gsub(text, "dba%'", "'w")
text = gsub(text, "'w%f[%A]", "'wa")
elseif lang == "kjz" then
text = gsub(text, "r%f[nm]", "'")
text = gsub(text, "db%f[yr]", "'")
text = gsub(text, "dba%'", "'w")
text = gsub(text, "ml", "'l")
text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
text = gsub(text, "%'%f[aeiouāēīōū]", "")
text = gsub(text, "q", "'")
text = gsub(text, "([aeiou])'", "%1h")
text = gsub(text, "([aou])l%f[%A]", "%1̈")
end
text = gsub(text, "[bdg]%f[%A]", {b="p", d="t", g="k"})
text = gsub(text, "nk%f[%A]", "ng")
text = gsub(text, "%s", "")
end
text = gsub(text, "\1", " ") -- substitute normal space between words back
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return toNFC(text) .. " "
end
return export