Lompat ke isi

Modul:utilitas

Dari Wikikato

Dokumentasi untuk modul ini dapat dibuat di Modul:utilitas/doc

local U = {}

local Scripts = require("Modul:aksara")

-- Internal helper: build category link with optional sortkey
local function makeCat(name, sortkey)
	if sortkey and sortkey ~= "" then
		return string.format("[[Kategori:%s|%s]]", name, sortkey)
	else
		return string.format("[[Kategori:%s]]", name)
	end
end

---------------------------------------------------------------------
-- Whitelist of scripts for which stripping diacritics is appropriate
---------------------------------------------------------------------
local stripScripts = {
	Latn = true,
	Arab = true,
	Hebr = true,
	Syrc = true,
	Thaa = true,
}

---------------------------------------------------------------------
-- Helper: try to coerce a table/frame into a reasonable string
-- Returns nil or "" if nothing usable found.
---------------------------------------------------------------------
local function extractStringFromTable(t)
	if type(t) ~= "table" then return nil end

	-- If it's a "frame-like" table with .args
	if t.args and type(t.args) == "table" then
		-- prefer positional 1
		if type(t.args[1]) == "string" and t.args[1] ~= "" then
			return t.args[1]
		end
		-- prefer named "text"
		if type(t.args["text"]) == "string" and t.args["text"] ~= "" then
			return t.args["text"]
		end
		-- otherwise return the first non-empty string value in args
		for k, v in pairs(t.args) do
			if type(v) == "string" and v ~= "" then
				return v
			end
		end
	end

	-- If it's a plain array-like table, try index 1
	if type(t[1]) == "string" and t[1] ~= "" then
		return t[1]
	end

	-- Nothing usable found
	return nil
end

---------------------------------------------------------------------
-- Proper diacritic stripping: coerce input, normalize → remove combining marks → recompose
-- Handles precomposed characters by using NFD decomposition where available.
---------------------------------------------------------------------
function U.sortkeyStrip(text)
	-- If text is a table (frame or args), try to extract a string first
	if type(text) == "table" then
		local extracted = extractStringFromTable(text)
		text = extracted or ""
	end

	if not text or text == "" then return text end

	-- detect script using Modul:scripts
	local sc = Scripts.detect(text)
	if not stripScripts[sc] then
		-- not in whitelist: return original text unchanged
		return text
	end

	-- Prefer normalization approach (handles precomposed letters like U+00E1)
	if mw.ustring and mw.ustring.toNFD then
		local decomp = mw.ustring.toNFD(text)

		local pattern = "[" ..
			mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) ..
			mw.ustring.char(0x1AB0) .. "-" .. mw.ustring.char(0x1AFF) ..
			mw.ustring.char(0x1DC0) .. "-" .. mw.ustring.char(0x1DFF) ..
			mw.ustring.char(0x20D0) .. "-" .. mw.ustring.char(0x20FF) ..
			mw.ustring.char(0xFE20) .. "-" .. mw.ustring.char(0xFE2F) ..
			"]"

		decomp = mw.ustring.gsub(decomp, pattern, "")
		return mw.ustring.toNFC(decomp)
	else
		local cps = { mw.ustring.codepoint(text, 1, -1) }
		local out = {}
		for i = 1, #cps do
			local cp = cps[i]
			if not (
				(cp >= 0x0300 and cp <= 0x036F) or
				(cp >= 0x1AB0 and cp <= 0x1AFF) or
				(cp >= 0x1DC0 and cp <= 0x1DFF) or
				(cp >= 0x20D0 and cp <= 0x20FF) or
				(cp >= 0xFE20 and cp <= 0xFE2F)
			) then
				table.insert(out, mw.ustring.char(cp))
			end
		end
		return table.concat(out)
	end
end

---------------------------------------------------------------------
-- NEW: Get normalized subpagename for sortkey
---------------------------------------------------------------------
function U.getSortkey()
	local title = mw.title.getCurrentTitle()
	local base = title.subpageText or title.text or ""
	if base == "" then return "" end

	local last = mw.ustring.match(base, "([^/]+)$") or base
	last = mw.ustring.match(last, "^%s*(.-)%s*$") or last
	if last == "" then last = base end

	local stripped = U.sortkeyStrip(last)
	if not stripped or stripped == "" then
		return last
	end
	return stripped
end

---------------------------------------------------------------------
-- Category link without language code
---------------------------------------------------------------------
function U.cat(text, sortkey)
	if not text or text == "" then
		return U.errorcat({ "templat" })
	end
	local cleanSort = U.sortkeyStrip(sortkey)
	return makeCat(text, cleanSort)
end

---------------------------------------------------------------------
-- Category link with language code
---------------------------------------------------------------------
function U.langcat(lang, text, sortkey)
	if not lang or lang == "" or not text or text == "" then
		return U.errorcat({ "bahasa" })
	end
	local cleanSort = U.sortkeyStrip(sortkey)
	return makeCat(lang .. ":" .. text, cleanSort)
end

---------------------------------------------------------------------
-- Convert category into a category page link (no categorization)
---------------------------------------------------------------------
function U.catlink(cat)
	if not cat or cat == "" then
		return ""
	end
	return cat:gsub("^%[%[", "[[:")
end

---------------------------------------------------------------------
-- Generic error message + category
---------------------------------------------------------------------
function U.errorcat(frame)
    local args = frame.args or frame
    local typeKey = args[1] or ""
    local suffix = args[2] or ""

    local messages = {
        bahasa  = "Galat: Parameter kode bahasa tidak sah.",
        templat = "Galat: Parameter templat tidak sah.",
        entri   = "Galat: Parameter entri tidak sah.",
        rima    = "Galat: Parameter rima tidak sah.",
        default = "Galat: Parameter tidak sah.",
    }

    local prefixes = {
        bahasa  = "Halaman dengan bahasa",
        templat = "Halaman dengan templat",
        entri   = "Halaman dengan entri",
        rima    = "Halaman dengan rima",
        default = "Halaman",
    }

    local msg = messages[typeKey] or messages.default
    local base = prefixes[typeKey] or prefixes.default

    if suffix ~= "" then
        msg = msg .. " (" .. suffix .. ")"
        base = base .. " " .. suffix
    end

    local ns = mw.title.getCurrentTitle().namespace
    if ns == 0 or ns == 114 then
        return string.format(
            "<strong class='error'>%s</strong>[[Kategori:%s galat]]",
            U.safeEscape(msg),
            base
        )
    else
        return ""
    end
end

---------------------------------------------------------------------
-- Generate rhyme categories
---------------------------------------------------------------------
function U.rhymecat(lang, rhymes, syllables, sortkey)
	local cats = {}
	local cleanSort = U.sortkeyStrip(sortkey)
	for i, rhyme in ipairs(rhymes or {}) do
		if rhyme and rhyme ~= "" then
			local catName = "Rima/" .. rhyme
			if syllables and syllables[i] and syllables[i] ~= "" then
				catName = catName .. "/" .. syllables[i] .. " suku kata"
			end
			table.insert(cats, U.langcat(lang, catName, cleanSort))
		end
	end
	return table.concat(cats, "\n")
end

---------------------------------------------------------------------
-- Etymology category generator
---------------------------------------------------------------------
function U.etymcat(lang1, type, lang2text, sortkey)
	if not lang1 or lang1 == "" then
		return U.errorcat({ "bahasa" })
	end
	if not type or type == "" then
		return U.errorcat({ "templat", "etimologi" })
	end

	local cleanSort = U.sortkeyStrip(sortkey)

	if lang2text and lang2text ~= "" then
		return U.langcat(lang1, type .. " dari " .. lang2text, cleanSort)
	else
		return U.langcat(lang1, type, cleanSort)
	end
end

---------------------------------------------------------------------
-- Wrappers for template usage
---------------------------------------------------------------------
function U.getCat(frame)
	local args = frame.args
	return U.cat(args[1], args[2])
end

function U.getLangcat(frame)
	local args = frame.args
	return U.langcat(args[1], args[2], args[3])
end

function U.getEtymcat(frame)
	local args = frame.args
	return U.etymcat(args[1], args[2], args[3], args[4])
end

function U.getErrorcat(frame)
	return U.errorcat(frame)
end

---------------------------------------------------------------------
-- Safe text escaping
---------------------------------------------------------------------
function U.safeEscape(text)
	if mw.text and mw.text.escape then
		return mw.text.escape(text)
	end
	return text or ""
end

return U