モジュール:headword/data

出典: フリー多機能辞典『ウィクショナリー日本語版(Wiktionary)』
ナビゲーションに移動 検索に移動

このモジュールについての説明文ページを モジュール:headword/data/doc に作成できます

local data = {}

data.invariable = {
	"cmavo",
	"cmene",
	"fu'ivla",
	"gismu",
	"Han tu",
	"hanzi",
	"hanja",
	"jyutping",
	"kanji",
	"lujvo",
	"phrasebook",
	"pinyin",
	"rafsi",
	"romaji",
}

data.lemmas = {
	--TODO: 英語はいらない。
	"abbreviation",
	"acronym",
	"adjective",
	"adnominal",
	"adposition",
	"adverb",
	"affixe",
	"ambiposition",
	"article",
	"circumfix",
	"circumposition",
	"classifier",
	"cmavo",
	"cmavo cluster",
	"cmene",
	"combining form",
	"conjunction",
	"counter",
	"determiner",
	"diacritical mark",
	"equative adjective",
	"fu'ivla",
	"gismu",
	"Han character",
	"Han tu",
	"hanzi",
	"hanja",
	"ideophone",
	"idiom",
	"infix",
	"interfix",
	"initialism",
	"interjection",
	"kanji",
	"letter",
	"ligature",
	"lujvo",
	"morpheme",
	"non-constituent",
	"noun",
	"number",
	"numeral",
	"numeral symbol",
	"particle",
	"phrase",
	"postposition",
	"postpositional phrase",
	"predicative",
	"prefix",
	"preposition",
	"prepositional phrase",
	"preverb",
	"pronominal adverb",
	"pronoun",
	"prover",
	"proper noun",
	"punctuation mark",
	"relative",
	"root",
	"stem",
	"suffix",
	"syllable",
	"symbol",
	"verb",
	"漢字",
	"間投詞",
	"感動詞",
	"形容詞",
	"形容動詞",
	"固有名詞",
	"数詞",
	"代名詞",
	"動詞",
	"副詞",
	"分詞",
	"名詞",
}

data.nonlemmas = {
	-- TODO: 英語はいらない。
	"active participle",
	"adjectival participle",
	"adjective form",
	"adjective feminine form",
	"adjective plural form",
	"adverb form",
	"adverbial participle",
	"agent participle",
	"article form",
	"circumfix form",
	"combined form",
	"comparative adjective form",
	"comparative adjective",
	"comparative adverb form",
	"comparative adverb",
	"contraction",
	"converb",
	"determiner comparative form",
	"determiner form",
	"determiner superlative form",
	"diminutive noun",
	"equative adjective form",
	"equative adjective",
	"future participle",
	"gerund",
	"infinitive form",
	"infinitive",
	"interjection form",
	"jyutping",
	"kanji reading",
	"misspelling",
	"name form",
	"negative participle",
	"nominal participle",
	"noun case form",
	"noun dual form",
	"noun form",
	"noun paucal form",
	"noun plural form",
	"noun possessive form",
	"noun singulative form",
	"numeral form",
	"participle",
	"participle form",
	"particle form",
	"passive participle",
	"past active participle",
	"past participle",
	"past participle form",
	"past passive participle",
	"perfect active participle",
	"perfect participle",
	"perfect passive participle",
	"pinyin",
	"plural",
	"postposition form",
	"prefix form",
	"preposition contraction",
	"preposition form",
	"prepositional pronoun",
	"present active participle",
	"present participle",
	"present passive participle",
	"pronoun form",
	"pronoun possessive form",
	"proper noun form",
	"proper noun plural form",
	"rafsi",
	"romanization",
	"root form",
	"singulative",
	"suffix form",
	"superlative adjective form",
	"superlative adjective",
	"superlative adverb form",
	"superlative adverb",
	"verb form",
	"verbal noun",
	"形容詞 定形",
	"固有名詞 定形",
	"動詞 活用形",
	"動詞 定形",
	"副詞 定形",
	"分詞 定形",
	"名詞 定形",
	"名詞 複数形",
	"和語の漢字表記",
}

-- These languages will not have "LANG multiword terms" categories added.
data.no_multiword_cat = {
	-------- Languages without spaces between words (sometimes spaces between phrases) --------
	"aho", -- Ahom
	"blt", -- Tai Dam
	"ja", -- Japanese
	"khb", -- Lü
	"km", -- Khmer
	"lo", -- Lao
	"mnw", -- Mon
	"my", -- Burmese
	"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
	"nod", -- Northern Thai
	"ojp", -- Old Japanese
	"tdd", -- Tai Nüa
	"th", -- Thai
	"tts", -- Isan
	"twh", -- Tai Dón
	"shn", -- Shan
	"sou", -- Southern Thai
	"zh", -- Chinese (all varieties with Chinese characters)

	-------- Languages with spaces between syllables --------
	"ahk", -- Akha
	"aou", -- A'ou
	"atb", -- Zaiwa
	"byk", -- Biao
	--"duu", -- Drung; not sure
	--"hmx-pro", -- Proto-Hmong-Mien
	--"hnj", -- Green Hmong; not sure
	"huq", -- Tsat
	"ium", -- Iu Mien
	--"lis", -- Lisu; not sure
	"mtq", -- Muong
	--"mww", -- White Hmong; not sure
	--"sit-gkh", -- Gokhy; not sure
	--"swi", -- Sui; not sure
	"tbq-lol-pro", -- Proto-Loloish
	"tdh", -- Thulung
	"ukk", -- Muak Sa-aak
	"vi", -- Vietnamese
	"yig", -- Wusa Nasu
	"zng", -- Mang

	-------- Languages with ~ with surrounding spaces used to separate variants --------
	"mkh-ban-pro", -- Proto-Bahnaric
	"sit-pro", -- Proto-Sino-Tibetan; listed above
	
	-------- Other weirdnesses --------
	"mul", -- Translingual; gestures, Morse code, etc.
	"aot", -- Atong (India); bullet is a letter

	-------- All sign languages	--------
	"ads",
	"aed",
	"aen",
	"afg",
	"ase",
	"asf",
	"asp",
	"asq",
	"asw",
	"bfi",
	"bfk",
	"bog",
	"bqn",
	"bqy",
	"bvl",
	"bzs",
	"cds",
	"csc",
	"csd",
	"cse",
	"csf",
	"csg",
	"csl",
	"csn",
	"csq",
	"csr",
	"doq",
	"dse",
	"dsl",
	"ecs",
	"esl",
	"esn",
	"eso",
	"eth",
	"fcs",
	"fse",
	"fsl",
	"fss",
	"gds",
	"gse",
	"gsg",
	"gsm",
	"gss",
	"gus",
	"hab",
	"haf",
	"hds",
	"hks",
	"hos",
	"hps",
	"hsh",
	"hsl",
	"icl",
	"iks",
	"ils",
	"inl",
	"ins",
	"ise",
	"isg",
	"isr",
	"jcs",
	"jhs",
	"jls",
	"jos",
	"jsl",
	"jus",
	"kgi",
	"kvk",
	"lbs",
	"lls",
	"lsl",
	"lso",
	"lsp",
	"lst",
	"lsy",
	"lws",
	"mdl",
	"mfs",
	"mre",
	"msd",
	"msr",
	"mzc",
	"mzg",
	"mzy",
	"nbs",
	"ncs",
	"nsi",
	"nsl",
	"nsp",
	"nsr",
	"nzs",
	"okl",
	"pgz",
	"pks",
	"prl",
	"prz",
	"psc",
	"psd",
	"psg",
	"psl",
	"pso",
	"psp",
	"psr",
	"pys",
	"rms",
	"rsl",
	"rsm",
	"sdl",
	"sfb",
	"sfs",
	"sgg",
	"sgx",
	"slf",
	"sls",
	"sqk",
	"sqs",
	"ssp",
	"ssr",
	"svk",
	"swl",
	"syy",
	"tse",
	"tsm",
	"tsq",
	"tss",
	"tsy",
	"tza",
	"ugn",
	"ugy",
	"ukl",
	"uks",
	"vgt",
	"vsi",
	"vsl",
	"vsv",
	"xki",
	"xml",
	"xms",
	"ygs",
	"ysl",
	"zib",
	"zsl",
}

-- In these languages, the hyphen is not considered a word separator for the "multiword terms" category.
data.hyphen_not_multiword_sep = {
	"akk", -- Akkadian; hyphens between syllables
	"akl", -- Aklanon; hyphens for mid-word glottal stops
	"ber-pro", -- Proto-Berber; morphemes separated by hyphens
	"ceb", -- Cebuano; hyphens for mid-word glottal stops
	"cnk", -- Khumi Chin; hyphens used in single words
	"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
	"de", -- too many false positives
	"esx-esk-pro", -- hyphen used to separate morphemes
	"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
	"hil", -- Hiligaynon; hyphens for mid-word glottal stops
	"ilo", -- Ilocano; hyphens for mid-word glottal stops
	"lcp", -- Western Lawa; dash as syllable joiner
	"lwl", -- Eastern Lawa; dash as syllable joiner
	"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
	"msb", -- Masbatenyo; too many false positives
	"tl", -- Tagalog; too many false positives
	"war", -- Waray-Waray; too many false positives
}

-- These languages will not have "LANG masculine nouns" and similar categories added.
data.no_gender_cat = {
	-- Languages without gender but which use the gender field for other purposes
	"ja",
	"th",
}

data.notranslit = {
	"ams",
	"az",
	"bbc",
	"bug",
	"cia",
	"cjm",
	"cmn",
	"hak",
	"ja",
	"kzg",
	"lad",
	"lzh",
	"ms",
	"mul",
	"mvi",
	"nan",
	"oj",
	"ojp",
	"okn",
	"pi",
	"ro",
	"ryn",
	"rys",
	"ryu",
	"sh",
	"tgt",
	"th",
	"tkn",
	"tly",
	"und",
	"vi",
	"xug",
	"yue",
	"yoi",
	"yox",
	"za",
	"zh",
}

-- Script codes for which a script-tagged display title will be added.	
data.toBeTagged = {
	"Ahom",
	"Arab",
	"Avst",
	"Bali",
	"Cham",
	"Copt",
	"Kali",
	"Hani",
	"Hebr",
	"Lana",
	"Linb",
	"Mand",
	"Mong",
	"polytonic",
	"Rjng",
	"Samr",
	"Sund",
	"Sylo",
	"Tang",
	"Tavt",
	"Xsux",
}

for key, list in pairs(data) do
	data[key] = require("Module:utils").list_to_set(list)
end

-- Parts of speech for which categories like "German masculine nouns" or "Russian imperfective verbs"
-- will be generated if the headword is of the appropriate gender/number. We put this at the bottom
-- because it's a map, not a list.
data.pos_for_gender_number_cat = {
	["名詞"] = "名詞",
	["固有名詞"] = "名詞",
	["接尾辞"] = "接尾辞",
	-- We include verbs because impf and pf are valid "genders".
	["動詞"] = "動詞",
}

return data