コンテンツにスキップ

モジュール:Tibt-translit

出典: フリー多機能辞典『ウィクショナリー日本語版(Wiktionary)』

このモジュールについての説明文ページを モジュール:Tibt-translit/doc に作成できます

local m_str_utils = require("モジュール:string utilities")

local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local upper = m_str_utils.upper
local u = m_str_utils.char

local Tibt = require("モジュール:Tibt-common")

local export = {}

local twoChars = {
	["རྀ"] = "ṛ", ["ྲྀ"] = "ṛ", -- Primarily used in Sanskrit(-derived) borrowings.
	["ལྀ"] = "ḷ", ["ླྀ"] = "ḷ",
	
	["ཕ༹"] = "f", ["བ༹"] = "v", -- Used to transliterate Chinese.
	["ཁ༹"] = "x", ["ག༹"] = "ġ", -- Used in Balti.
	
	["ྥ༹"] = "f", ["ྦ༹"] = "v",
	["ྑ༹"] = "x", ["ྒ༹"] = "ġ",
}

local oneChar = {
	["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng",
	["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny",
	["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n",
	["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m",
	["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = ".w",
	["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "'", ["ཡ"] = ".y",
	["ར"] = ".r", ["ཪ"] = ".r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s",
	["ཧ"] = "h", ["ཨ"] = "\1",
	["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ",
	["ཫ"] = "q", ["ཬ"] = "ṛ", -- Used in Balti.
	["྅"] = "ʼ", ["ྈ"] = "x", ["ྉ"] = "f", ["ྌ"] = "f", -- Used in Sanskrit.

	-- Use Unicode references for non-spacing characters to avoid making it unreadable when edited in a text file.
	[u(0x0F71)] = u(0x0304), [u(0x0F72)] = "i", [u(0x0F80)] = "ị", [u(0x0F74)] = "u", [u(0x0F7A)] = "e",
	[u(0x0F7B)] = "ai", [u(0x0F7C)] = "o", [u(0x0F7D)] = "au",
	
	[u(0x0F7E)] = "ṃ", [u(0x0F82)] = "ṃ", [u(0x0F83)] = "m̐", ["ཿ"] = "ḥ",
	
	[u(0x0F90)] = "k", [u(0x0F91)] = "kh", [u(0x0F92)] = "g", [u(0x0F94)] = "ng",
	[u(0x0F95)] = "c", [u(0x0F96)] = "ch", [u(0x0F97)] = "j", [u(0x0F99)] = "ny",
	[u(0x0F9F)] = "t", [u(0x0FA0)] = "th", [u(0x0FA1)] = "d", [u(0x0FA3)] = "n",
	[u(0x0FA4)] = "p", [u(0x0FA5)] = "ph", [u(0x0FA6)] = "b", [u(0x0FA8)] = "m",
	[u(0x0FA9)] = "ts", [u(0x0FAA)] = "tsh", [u(0x0FAB)] = "dz", [u(0x0FAD)] = "w", [u(0x0FBA)] = "w",
	[u(0x0FAE)] = "zh", [u(0x0FAF)] = "z", [u(0x0FB0)] = "'", [u(0x0FB1)] = "y", [u(0x0FBB)] = "y",
	[u(0x0FB2)] = "r", [u(0x0FBC)] = "r", [u(0x0FB3)] = "l", [u(0x0FB4)] = "sh", [u(0x0FB6)] = "s",
	[u(0x0FB7)] = "h", [u(0x0FB8)] = "+a",
	[u(0x0F9A)] = "ṭ", [u(0x0F9B)] = "ṭh", [u(0x0F9C)] = "ḍ", [u(0x0F9E)] = "ṇ", [u(0x0FB5)] = "ṣ",
	[u(0x0F8D)] = "x", [u(0x0F8E)] = "f", [u(0x0F8F)] = "f",
}

local symbol = {
	["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4",
	["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9",
	["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5",
	["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5",
	["་"] = " ", ["༌"] = "*", ["།"] = ".", ["༎"] = ". ¶", ["༏"] = ";",
	["༑"] = "|", ["༈"] = "!", ["༔"] = ":", ["༼"] = "(", ["༽"] = ")",
	["༺"] = "<", ["༻"] = ">",
	[" "] = "\1"
}

function export.tr(text, lang, sc)

	local langObj; if not lang then
		error("言語コードが必要です")
	else
		langObj = require("モジュール:languages").getByCode(lang)
	end
	local scObj = require("モジュール:scripts").getByCode("Tibt")
	text = gsub(text, "༒", "།")
	text = gsub(text, "[་༌]+$", "")
	
	for word in Tibt.getWords(text) do
		for syllable in Tibt.getSyllables(word) do
			local tr = syllable
			
			tr = gsub(tr, "(ཱ)([ིེུ-ཽྀྲླ]+)", "%2%1")
			
			local mainStack = gsub(Tibt.findMainStack(syllable, lang), "[ཾཿྂྃ]", "")
			if match(mainStack, "([^ༀི-ཽྀ]ཱ?)$") then
				local newMainStack = mainStack .. "a"
				newMainStack = gsub(newMainStack, "ཱa$", "aཱ")
				tr = gsub(tr, mainStack, newMainStack, 1)
			end
			
			tr = gsub(tr, "^(.*)༷(.*)$", "<u>%1%2</u>")
			tr = gsub(tr, "^(.*)༵(.*)$", "<span style=\"text-decoration-style:double;\">%1%2</span>")
			
			for letter, replacement in pairs(twoChars) do
				tr = gsub(tr, letter, replacement)
			end
			tr = gsub(tr, ".", oneChar)
			
			tr = gsub(tr, "(.')([^aāeiīoḷḹṛṝuū%-<])", "%1a%2")
			tr = gsub(tr, "%f[^%zaāeiīoḷḹṛṝuū%->]%.", "")
			tr = gsub(tr, "%.([rwy][^aāeiīoḷḹṛṝuū])", "%1")
			tr = gsub(tr, "^\1", "")
			tr = tr:gsub("\1", "%.")
			
			text = gsub(text, syllable, tr, 1)
		end
	end
	
	text = gsub(text, u(0x0F84) .. "a?", "")
	text = gsub(text, ".", symbol)
	text = gsub(text, " ' ", "")
	text = gsub(text, " *· *·? *", " · ")
	text = gsub(text, " *%.", ".")
	text = gsub(text, "\n+", "\n\n")
	text = gsub(text, "\n\n$", "")
	if match(text, "%. ") or match(text, "%.\n.") or match(text, "%.$") then
		text = gsub(text, "^'?.", upper)
		text = gsub(text, "\n\n'?.", upper)
		text = gsub(text, "%. '?.", upper)
	end
	
	-- East Bodish-specific fixes
	local east_bodish = { ["xkz"] = true, ["kjz"] = true, ["xkf"] = true, ["dzl"] = true, ["dka"] = true }
	
	if east_bodish[lang] then
		if lang == "xkz" then
			text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
			text = gsub(text, "%'%f[aeiouāēīōū]", "")
			text = gsub(text, "q", "'")
			text = gsub(text, "̄", "̂")
			text = gsub(text, "̂́", "́̂")
			text = gsub(text, "([bcdfghjklmnprstvwxyz]+[aeiou])́", "'%1")
			text = gsub(text, "([aeiouâêîôû]n?[bcdfghjklmnprstvwxyz])a", "%1")
		elseif lang == "dzl" then
			text = gsub(text, "g%.y", "'y")
			text = gsub(text, "sn", "'n")
			text = gsub(text, "dbr", "'r")
			text = gsub(text, "dba%'", "'w")
			text = gsub(text, "'w%f[%A]", "'wa")
		elseif lang == "kjz" then
			text = gsub(text, "r%f[nm]", "'")
			text = gsub(text, "db%f[yr]", "'")
			text = gsub(text, "dba%'", "'w")
			text = gsub(text, "ml", "'l")
			text = gsub(text, "%f[^%c%s]%f[aeiouāēīōū]", "q")
			text = gsub(text, "%'%f[aeiouāēīōū]", "")
			text = gsub(text, "q", "'")
			text = gsub(text, "([aeiou])'", "%1h")
			text = gsub(text, "([aou])l%f[%A]", "%1̈")
		end
		
		text = gsub(text, "[bdg]%f[%A]", {b="p", d="t", g="k"})
		text = gsub(text, "nk%f[%A]", "ng")
		text = gsub(text, "%s", "")
	end
	
	text = gsub(text, "\1", " ") -- substitute normal space between words back
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return toNFC(text) .. " "
end

return export