模組:Category tree/poscatboiler/data/lang-specific/jpx/sandbox

維基詞典,自由的多語言詞典


local export = {}

local Hira = require("Module:scripts").getByCode("Hira")
local m_numeric = require("Module:ConvertNumeric")
local rmatch = mw.ustring.match
local map_reading_types = {
	["goon"] = "吳音",
	["kan'on"] = "漢音",
	["kan'yōon"] = "慣用音",
	["kun"] = "訓讀",
	["nanori"] = "名乘",
	["on"] = "音讀",
	["tōon"] = "唐音",
	["sōon"] = "宋音",
}

function export.add_labels(labels, lang)
	labels["連体詞"] = {
		description = "{{{langname}}} {{mention|ja|連体詞||[[adnominal]], attributive|tr=れんたいし, rentaishi|sc=Jpan}}. Modifies nouns, doesn’t conjugate, and doesn’t [[predicate#Verb|predicate]].",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["平假名"] = {
		description = "The [[hiragana]] ([[平仮名]], [[ひらがな]]) form of " .. (lang:getCode() == "ryu" and "an" or "a") .. " {{{langname}}} word is a [[phonetic]] representation of that word. " ..
		"Wiktionary represents {{{langname}}}-language segments in three ways: in normal form (with [[kanji]], if appropriate), in [[hiragana]] " ..
		"form (this differs from kanji form only when the segment contains kanji), and in [[romaji]] form.",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} katakana]]",
		toc_template = "categoryTOC-hiragana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["歷史平假名"] = {
		description = "{{{langname}}} historical [[hiragana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical katakana]].",
		toc_template = "categoryTOC-hiragana",
		parents = {
			"平假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["片假名"] = {
		description = "{{{langname}}} words and terms with katakana forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in [[shinjitai]].",
		additional = "Entries in this category are made by {{{langname}}} POS templates, [[Template:{{{langcode}}}-noun]], etc.",
		toc_template = "categoryTOC-katakana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["歷史片假名"] = {
		description = "{{{langname}}} historical [[katakana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical hiragana]].",
		toc_template = "categoryTOC-katakana",
		parents = {
			"片假名",
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["terms spelled with mixed kana"] = {
		description = "{{{langname}}} terms which combine [[hiragana]] and [[katakana]] characters, potentially with [[kanji]] too.",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"hiragana",
			"katakana",
		},
	}

	labels["敬語"] = {
		intro = "{{wikipedia|Honorific speech in Japanese}}",
		description = "{{{langname}}} [[honorific]]s.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["謙讓語"] = {
		description = "{{{langname}}} humble terms, or {{ja-r|謙譲語|けんじょうご}}, which is a type of honorific speech in {{{langname}}} that lowers the speaker in relation to the listener.",
		parents = "敬語",
	}

	labels["尊敬語"] = {
		description = "{{{langname}}} respectful terms, or {{ja-r|尊敬語|そんけいご}}, which is a type of honorific speech in {{{langname}}} that elevates the listener in relation to the speaker.",
		parents = "敬語",
	}

	labels["kanji by reading"] = {
		description = "{{{langname}}} kanji categorized by reading.",
		parents = {{name = "Han characters", sort = "reading"}},
	}

	labels["terms by kanji readings"] = {
		description = "{{{langname}}} categories grouped with regard to the readings of the kanji with which they are spelled.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms by reading pattern"] = {
		description = "{{{langname}}} categories with terms grouped by their reading patterns.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	for reading_pattern, link in pairs {
		["熟字訓"] = "{{ja-r|熟字訓|じゅくじくん}}",
		["重箱讀"] = "{{ja-r|重%箱%読み|じゅう%ばこ%よみ}}, a reading pattern in {{{langname}}} compounds where kanji read with ''on'yomi'' are followed by kanji read with ''kun'yomi''",
		["訓讀"] = "{{ja-r|訓読み|くんよみ}}",
		["名乘"] = "{{ja-r|名乗り|なのり}}",
		["音讀"] = "{{ja-r|音%読み|おん%よみ}}",
		["湯桶讀"] = "{{ja-r|湯%桶%読み|ゆ%とう%よみ}}, a reading pattern in {{{langname}}} compounds where kanji read with ''kun'yomi'' are followed by kanji read with ''on'yomi''",
	} do
		labels["terms read with " .. reading_pattern] = {
			description = "{{{langname}}} terms exhibiting " .. link .. ".",
			breadcrumb = reading_pattern,
			parents = {{name = "terms by reading pattern", sort = reading_pattern}},
		}
	end

	labels["依漢字讀法分類的"] = {
		description = "{{{langname}}} categories with terms grouped with regard to the types of readings of the kanji with which " ..
		"they are spelled, broadly those of Chinese origin (''on'' readings) and those of {{{langname}}} origin (''kun'' readings).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	local on_continuation = [=[


	Categories of terms with more specific types of ''on'' readings can be found in the following categories:
	* [[:Category:有漢字使用吳音的{{{langname}}}詞]]
	* [[:Category:有漢字使用慣用音的{{{langname}}}詞]]
	* [[:Category:有漢字使用漢音的{{{langname}}}詞]]
	* [[:Category:有漢字使用唐音的{{{langname}}}詞]]
	]=]

	local on_desc = ", which is a type of {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading"
	for reading_type, reading_desc in pairs {
		["吳音"] = "a {{ja-r|呉%音|ご%おん}} reading" .. on_desc,
		["漢音"] = "a {{ja-r|漢%音|かん%おん}} reading" .. on_desc,
		["慣用音"] = "a {{ja-l|慣用音|かんようおん|kan'yōon}} reading" .. on_desc,
		["訓讀"] = "a {{ja-r|訓%読み|くん%よみ}} or {{ja-r|訓|くん}} reading",
		["名乘"] = "a {{ja-r|名%乗り|な%のり}} reading, which is a type of reading used for people and places",
		["音讀"] = "an {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading",
		["唐音"] = "a {{ja-r|唐%音|とう%おん}} reading" .. on_desc,
		["宋音"] = "a {{ja-r|宋%音|そう%おん}} reading" .. on_desc,
	} do
		labels["有漢字使用" .. reading_type .. "的"] = {
			description = "{{{langname}}} categories with terms that are spelled with one or more kanji which exhibit " .. reading_desc .. ".",
			additional = reading_type == "on" and on_continuation,
			breadcrumb = reading_type,
			parents = {{name = "依漢字讀法分類的", sort = reading_pattern}},
		}
	end

	labels["terms spelled with ateji"] = {
		intro = "{{wikipedia|Ateji}}",
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#ateji|ateji]] ({{mention|ja|当て字}}), which are [[kanji]] used to represent sounds rather than meanings (though meaning may have some influence on which kanji are chosen).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms spelled with daiyōji"] = {
		description = "Japanese terms spelled using [[Appendix:Japanese glossary#daiyouji|daiyōji]], categorized using {{temp|ja-daiyouji}}.",
		parents = {"terms by etymology"},
	}

	labels["terms spelled with jukujikun"] = {
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#jukujikun|jukujikun]] ({{mention|ja|熟字訓}}), which are [[kanji]] used to represent meanings rather than sounds.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["words with multiple readings"] = {
		description = "{{{langname}}} words with multiple pronunciations (hence multiple [[kana]] spellings).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["kanji readings by number of morae"] = {
		description = "{{{langname}}} categories grouped with regard to the number of morae in their kanji readings.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
end



function export.add_handlers(handlers, lang, m_lang, m_lang_kanji_readings)
	-- FIXME: Only works for 0 through 19.
	local word_to_number = {}
	for k, v in pairs(m_numeric.ones_position) do
		word_to_number[v] = k
	end

	local kana_capture = "([-ぁ-ー𛀁𛀆]+)"

	local periods = {
		historical = true,
		ancient = true,
	}

	local function get_period_text_and_reading_type_link(period, reading_type)
		period = period ~= "" and period or nil
		if period and not periods[period] then
			return nil
		end
		local period_text = period and period .. " " or nil

		-- Allow periods (historical or ancient) by themselves; they will parse as reading types.
		if not period and periods[reading_type] then
			return nil, reading_type
		end

		local reading_label = m_lang_kanji_readings.accented_labels_to_labels[reading_type]
		if not reading_label then
			return nil
		end
		reading_label = m_lang_kanji_readings.labels[reading_label]
		local reading_type_link = "[[" .. reading_label.entry .. "|" .. mw.ustring.lower(reading_label.text) .. "]]"
		return period_text, reading_type_link
	end
			
	local function get_tagged_reading(reading)
		return require("Module:script utilities").tag_text(reading, lang, m_lang_kanji_readings.get_script(reading))
	end

	local function get_reading_link(reading, historical)
		return m_lang_kanji_readings.plain_link {
			term = reading, tr = m_lang.kana_to_romaji(string.gsub(reading, '%-', ''), historical and { hist = true })
		}
	end

	local function is_on_subtype(reading_type)
		return reading_type:find(".on$")
	end


	table.insert(handlers, function(data)
		local count, plural = data.label:match("^terms written with (.+) Han script character(s?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} terms written with " .. count .. " Han script character" .. plural .. " (also known as [[kanji]]).",
				breadcrumb = num,
				parents = {{name = "character counts", sort = num}},
			}
		end
	end)


	table.insert(handlers, function(data)
		local count, plural = data.label:match("^kanji readings with (.+) mora(e?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} kanji readings containing " .. count .. " mora" .. plural .. ".",
				breadcrumb = num,
				parents = {{name = "kanji readings by number of morae", sort = num}},
			}
		end
	end)


	table.insert(handlers, function(data)
		local label_pref, period, reading_type, reading = rmatch(data.label, "^(kanji with ([a-z]-) ?([%a']+) reading )" .. kana_capture .. "$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				{name = "kanji by " .. (period_text or "") .. reading_type .. " reading", sort = m_lang.jsort(reading)}
			}
			if is_on_subtype(reading_type) then
				table.insert(parents, {name = "kanji with " .. (period_text or "") .. "on reading " .. reading, sort = reading_type})
			elseif period_text then
				table.insert(parents, {name = "kanji with " .. period_text .. "reading " .. reading, sort = reading_type})
			end
			if not period_text then
				table.insert(parents, {name = "kanji read as " .. reading, sort = reading_type})
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] with the " .. (period_text or "") .. reading_type_link .. " reading " ..
					get_reading_link(reading, period_text) .. ".",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local period, reading_type = rmatch(data.label, "^kanji by ([a-z]-) ?([%a']+) reading$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				is_on_subtype(reading_type) and {name = "kanji by " .. (period_text or "") .. "on reading", sort = reading_type} or
				period_text and {name = "kanji by " .. reading_type .. " reading", sort = period} or
				{name = "kanji by reading", sort = reading_type}
			}
			if period_text then
				table.insert(parents, {name = "kanji by " .. period_text .. "reading", sort = reading_type})
			end

			-- Compute description.
			local description = "{{{langname}}} [[kanji]] categorized by " .. (period_text or "") .. reading_type_link .. " reading."
			return {
				description = description,
				breadcrumb = (period_text or "") .. reading_type,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(kanji read as )" .. kana_capture .. "$")
		if reading then
			local parents = {{name = "kanji by reading", sort = m_lang.jsort(reading)}}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] read as " .. get_reading_link(reading, period_text) .. ".",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local reading = rmatch(data.label, "^讀作「" .. kana_capture .. "」的$")
		if reading then
			-- Compute parents.
			local sort_key = m_lang.jsort(reading)
			local mora_count = m_lang.count_morae(reading)
			local mora_count_words = m_numeric.spell_number(tostring(mora_count))
			local parents = {
				{name = "terms by kanji readings", sort = sort_key},
				{name = "kanji readings with " .. mora_count_words .. " mora" .. (mora_count > 1 and "e" or ""), sort = sort_key},
				{name = "kanji read as " .. reading, sort = " "},
			}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} terms that contain kanji that exhibit a reading of " .. get_reading_link(reading) ..
				" in those terms prior to any sound changes.",
				displaytitle = "讀作「" .. tagged_reading .. "」的{{{langname}}}詞",
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local kanji, reading = rmatch(data.label, "^寫作「(.)」讀作「" .. kana_capture .. "」的$")
		if not kanji then
			return nil
		end
		local params = {
			[4] = {list = true},
		}
		local args = require("Module:parameters").process(data.args, params)
		if #args[4] == 0 then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms spelled with KANJI read as READING\", at least one reading type (e.g. \"kun\" or \"on\") must be specified using 1=, 2=, 3=, etc.")
		end
		local reading_types = {}
		for _, reading_type in ipairs(args[4]) do
			table.insert(reading_types, map_reading_types[reading_type])
		end
		reading_types = mw.text.listToText(reading_types, "、", "或")

		local parents = {
			{name = "寫作「" .. kanji.. "」的", sort = m_lang.jsort(reading)},
			-- FIXME, using the kanji directly as the sort key is what it did before but maybe we should call [[Module:zh-sortkey]]
			-- to get the radical/stroke sort key
			{name = "讀作「" .. reading .. "」的", sort = kanji},
		}
		for _, reading_type in ipairs(args[4]) do
			table.insert(parents, {name = "有漢字使用" .. map_reading_types[reading_type] .. "的", sort = m_lang.jsort(reading)})
		end
		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_reading = get_tagged_reading(reading)
		return {
			description = "含有漢字「{{{{{langcode}}}-l|" .. kanji .. "}}」且採用" ..
				reading_types .. "讀作「" .. get_reading_link(reading) .. "」的{{{langname}}}詞語。",
			displaytitle = "寫作「" .. mw.text.encode(tagged_kanji, kanji) .. "」讀作「" .. tagged_reading .. "」的{{{langname}}}詞",
			breadcrumb = "讀作「" .. tagged_reading .. "」",
			parents = parents,
		}, true
	end)


	table.insert(handlers, function(data)
		local kanji, daiyoji = rmatch(data.label, "^terms with (.) replaced by daiyōji (.)$")
		if not kanji then
			return nil
		end
		local params = {
			["sort"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		if not args.sort then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms with KANJI replaced by daiyōji DAIYOJI\", the sort key must be specified using sort=")
		end

		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_daiyoji = get_tagged_reading(daiyoji)
		return {
			description = "{{{langname}}} terms with {{{{{langcode}}}-l|" .. kanji .. "}} replaced by [[Appendix:Japanese glossary#daiyouji|daiyōji]] {{{{{langcode}}}-l|" .. daiyoji .. "}}.",
			displaytitle = "{{{langname}}} terms with " .. tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			breadcrumb = tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			parents = {{name = "terms spelled with daiyōji", sort = args.sort}},
		}, true
	end)
end


return export