Modul:ziman/dane2
Belgekirina modulê[nîşan bide] [biguhêre] [rojane bike]
- Ev belge ji Modul:ziman/dane2/belge hatiye girtin.
local u = mw.ustring.char
-- UTF-8 encoded strings for some commonly-used diacritics
local GRAVE = u(0x0300)
local ACUTE = u(0x0301)
local CIRC = u(0x0302)
local TILDE = u(0x0303)
local MACRON = u(0x0304)
local BREVE = u(0x0306)
local DOTABOVE = u(0x0307)
local DIAER = u(0x0308)
local CARON = u(0x030C)
local DGRAVE = u(0x030F)
local INVBREVE = u(0x0311)
local DOTBELOW = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA = u(0x0327)
local OGONEK = u(0x0328)
local DOUBLEINVBREVE = u(0x0361)
-- Punctuation to be used for standardChars field
local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()'
local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}
local m = {}
m["aa"] = {
"afarî",
"Q27811",
"cus",
aliases = {"qafarî"},
scripts = Latn,
entry_name = { remove_diacritics = ACUTE},
}
m["ab"] = {
"abxazî",
"Q5111",
"cau-abz",
aliases = {"Abxazayî"},
scripts = {"Cyrl", "Geor", "Latn"},
translit_module = "ab-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ae"] = {
"avestayî",
"Q29572",
"ira-cen",
scripts = {"Avst", "Gujr"},
translit_module = "Avst-translit",
wikipedia_article = "Avestan",
}
m["af"] = {
"afrîkansî",
"Q14196",
"gmw",
scripts = LatnArab,
ancestors = {"nl"},
sort_key = {
from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
}
m["ak"] = {
"akanî",
"Q28026",
"alv-ctn",
scripts = Latn,
}
m["am"] = {
"amharî",
"Q28244",
"sem-eth",
scripts = {"Ethi"},
translit_module = "Ethi-translit",
}
m["an"] = {
"aragonî",
"Q8765",
"roa-ibe",
scripts = Latn,
ancestors = {"roa-oan"},
}
m["ar"] = {
"erebî",
"Q13955",
"sem-arb",
-- FIXME, some of the following are varieties but it's not clear which ones
otherNames = {"erebiya standard ya nûjen", "erebiya standard", "erebiya klasîk"},
scripts = {"Arab", "Brai"},
-- replace alif waṣl with alif
-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
-- damma, kasra, shadda, sukun, superscript (dagger) alef
entry_name = {
from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)},
to = {u(0x0627)}},
translit_module = "ar-translit",
standardChars = "ء-غف-ْٰٱ" .. PUNCTUATION .. "٠-٩،؛؟٫٬ـ",
}
m["as"] = {
"asamî",
"Q29401",
"inc-eas",
scripts = {"as-Beng"},
ancestors = {"inc-mas"},
translit_module = "as-translit",
}
m["av"] = {
"avarî",
"Q29561",
"cau-nec",
scripts = Cyrl,
ancestors = {"oav"},
translit_module = "av-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ay"] = {
"aymarayî",
"Q4627",
"sai-aym",
varieties = {"aymarayiya başûrî", "aymarayiya navendî"},
scripts = Latn,
}
m["az"] = {
"azerî",
"Q9292",
"trk-ogz",
aliases = {"azerbaycanî", "azêrbaycanî", "tirkiya azerî", "tirkiya azerbaycanî"},
varieties = {"azeriya bakurî", "azeriya başûrî",
{"afşarî", "azeriya afşarî"},
{"Qaşqayî", "Qaşqay"},
"Sonqor"
},
scripts = {"Latn", "Cyrl", "fa-Arab"},
ancestors = {"trk-oat"},
}
m["ba"] = {
"başkîrî",
"Q13389",
"trk-kbu",
scripts = Cyrl,
translit_module = "ba-translit",
override_translit = true,
}
m["be"] = {
"belarusî",
"Q9091",
"zle",
scripts = Cyrl,
ancestors = {"orv"},
translit_module = "be-translit",
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}},
entry_name = {
from = {"Ѐ", "ѐ", GRAVE, ACUTE},
to = {"Е", "е"}},
}
m["bg"] = {
"bulgarî",
"Q7918",
"zls",
scripts = {"Cyrl"},
ancestors = {"cu"},
translit_module = "bg-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
}
m["bh"] = {
"biharî",
"Q135305",
"inc-eas",
scripts = {"Deva"},
ancestors = {"inc-mgd"},
}
m["bi"] = {
"bîslamayî",
"Q35452",
"crp",
scripts = Latn,
ancestors = {"en"},
}
m["bm"] = {
"bambarayî",
"Q33243",
"dmn-emn",
aliases = {"bamanankanî"},
scripts = Latn,
}
m["bn"] = {
"bengalî",
"Q9610",
"inc-eas",
aliases = {"banglayî"},
scripts = {"Beng", "Newa"},
ancestors = {"inc-mbn"},
translit_module = "bn-translit",
}
m["bo"] = {
"tîbetî",
"Q34271",
"sit-tib",
scripts = {"Tibt"}, -- sometimes Deva?
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["br"] = {
"bretonî",
"Q12107",
"cel-bry",
scripts = Latn,
ancestors = {"xbm"},
}
m["ca"] = {
"katalanî",
"Q7026",
"roa",
-- don't list varieties here that are in [[Module:etymology languages/data]]
scripts = Latn,
ancestors = {"roa-oca"},
sort_key = {
from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"},
to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} ,
}
m["ce"] = {
"çeçenî",
"Q33350",
"cau-vay",
scripts = Cyrl,
translit_module = "ce-translit",
override_translit = true,
entry_name = {
from = {MACRON},
to = {}},
}
m["ch"] = {
"çamoroyî",
"Q33262",
"poz-sus",
aliases = {"çamoruyî"},
scripts = Latn,
}
m["co"] = {
"korsîkayî",
"Q33111",
"roa-itd",
aliases = {"korsuyî"},
scripts = Latn,
}
m["cr"] = {
"krî",
"Q33390",
"alg",
scripts = {"Cans", "Latn"},
translit_module = "translit-redirect",
}
m["cs"] = {
"çekî",
"Q9056",
"zlw",
scripts = Latn,
ancestors = {"zlw-ocs"},
sort_key = {
from = {"á", "é", "í", "ó", "[úů]", "ý"},
to = {"a", "e", "i", "o", "u" , "y"}} ,
}
m["cu"] = {
"slaviya kevn",
"Q35499",
"zls",
scripts = {"Cyrs", "Glag"},
translit_module = "Cyrs-Glag-translit",
entry_name = {
from = {u(0x0484)}, -- kamora
to = {}},
sort_key = {
from = {"оу", "є"},
to = {"у" , "е"}} ,
}
m["cv"] = {
"çuvaşî",
"Q33348",
"trk-ogr",
scripts = Cyrl,
translit_module = "cv-translit",
override_translit = true,
}
m["cy"] = {
"weylsî",
"Q9309",
"cel-bry",
scripts = Latn,
aliases = {"galî"},
ancestors = {"wlm"},
sort_key = {
from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"},
to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} ,
standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION,
}
m["da"] = {
"danmarkî",
"Q9035",
"gmq",
scripts = Latn,
ancestors = {"gmq-oda"},
}
m["de"] = {
"almanî",
"Q188",
"gmw",
varieties = {"almaniya alsatî", "almaniya emerîkî",
"almaniya baveryayî", "almaniya belçikayî",
"almaniya navendî",
"almaniya Liechtensteinî", "almaniya luksemburgî",
"almaniya namibî", "almaniya bakurî",
"almaniya prusyayî"},
scripts = {"Latn", "Latf"},
ancestors = {"gmh"},
sort_key = {
from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" },
to = {"a" , "e" , "i" , "o" , "u" , "ss"}} ,
standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION,
}
m["dv"] = {
"divehî",
"Q32656",
"inc-ins",
aliases = {"maldivyayî"},
scripts = {"Thaa"},
ancestors = {"elu-prk"},
translit_module = "dv-translit",
override_translit = true,
}
m["dz"] = {
"conxayî",
"Q33081",
"sit-tib",
scripts = {"Tibt"},
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["ee"] = {
"eweyî",
"Q30005",
"alv-gbe",
scripts = Latn,
}
m["el"] = {
"yûnanî",
"Q9129",
"grk",
aliases = {"yûnaniya nûjen", "neo-helenî"},
scripts = {"Grek", "Brai"},
ancestors = {"grc"},
translit_module = "el-translit",
override_translit = true,
sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd
from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"},
to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} ,
standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION .. "ἀ-῾",
}
m["en"] = {
"îngilîzî",
"Q1860",
"gmw",
aliases = {"îngilîziya nûjen", "îngilîziya nû"},
varieties = {{"îngilîziya hawaiyî"}, "polarî", "yîngilîzî"},
scripts = {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion
ancestors = {"enm"},
sort_key = {
from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}},
wikimedia_codes = {"en", "simple"},
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["eo"] = {
"esperantoyî",
"Q143",
"art",
scripts = Latn,
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"},
to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} ,
}
m["es"] = {
"spanî",
"Q1321",
"roa-ibe",
aliases = {"kastîlî"},
varieties = {"spaniya amazonî"},
scripts = {"Latn", "Brai"},
ancestors = {"osp"},
sort_key = {
from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"},
to = {"a", "e", "i", "o", "u" , "c", "n"}},
standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION,
}
m["et"] = {
"estonî",
"Q9072",
"fiu-fin",
scripts = Latn,
}
m["eu"] = {
"baskî",
"Q8752",
"euq",
aliases = {"euskara"},
scripts = Latn,
}
m["fa"] = {
"farisî",
"Q9168",
"ira-swi",
aliases = {"persî", "ecemî", "farisiya nû", "farisiya nûjen"},
varieties = {{"farisiya rojavayî", "farisiya îranî"}, {"farisiya rojhilatî", "darî"}, {"aymaqî"}},
scripts = {"fa-Arab"},
ancestors = {"pal"}, -- "ira-mid"
entry_name = {
from = {u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["ff"] = {
"fulahî",
"Q33454",
"alv-fwo",
aliases = {"fula"},
scripts = {"Latn", "Adlm"},
}
m["fi"] = {
"fînî",
"Q1412",
"urj-fin",
aliases = {"suomî"},
scripts = Latn,
entry_name = {
from = {"ˣ"}, -- Used to indicate gemination of the next consonant
to = {}},
sort_key = {
from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} ,
}
m["fj"] = {
"fîjî",
"Q33295",
"poz-occ",
scripts = Latn,
}
m["fo"] = {
"ferî",
"Q25258",
"gmq",
aliases = {"faroyî"},
scripts = Latn,
ancestors = {"non"},
}
m["fr"] = {
"fransî",
"Q150",
"roa-oil",
aliases = {"fransiya nûjen"},
varieties = {"fransiya afrîkî", "fransiya cezaîrî", "fransiya atlantîkî ya kanadayî",
"fransiya belçikayî",
"fransiya kongolî",
"fransiya haîtiyî",
"fransiya luksemburgî",
"fransiya fasî",
"fransiya kebekî",
"fransiya tunusî",
"fransiya afrîkayîya rojavayî"},
scripts = {"Latn", "Brai"},
ancestors = {"frm"},
sort_key = {
from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}},
standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION,
}
m["fy"] = {
"frîsî",
"Q27175",
"gmw-fri",
aliases = {"frîsiya rojavayî"},
scripts = Latn,
ancestors = {"ofs"},
sort_key = {
from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"},
to = {"a" , "e" , "i" , "o" , "u", "ae"}} ,
standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION,
}
m["ga"] = {
"îrlendî",
"Q9142",
"cel-gae",
aliases = {"îrlendiya gaelîkî", "gaelîkî", "îrî"}, -- calling it simply "Gaelic" is rare in Ireland, but relatively common in the Irish diaspora
scripts = Latn,
ancestors = {"mga"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" },
to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}
m["gd"] = {
"gaelîka skotî",
"Q9314",
"cel-gae",
aliases = {"gaelîkî", "skotî"},
scripts = Latn,
ancestors = {"mga"},
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION,
}
m["gl"] = {
"galîsî",
"Q9307",
"roa-ibe",
scripts = Latn,
ancestors = {"roa-opt"},
sort_key = {
from = {"á", "é", "í", "ó", "ú"},
to = {"a", "e", "i", "o", "u"}} ,
}
m["gn"] = {
"guwaranî",
"Q35876",
"tup-gua",
scripts = Latn,
}
m["gu"] = {
"gujaratî",
"Q5137",
"inc-wes",
scripts = {"Gujr"},
ancestors = {"inc-mgu"},
translit_module = "gu-translit",
}
m["gv"] = {
"manksî",
"Q12175",
"cel-gae",
aliases = {"gaelîkiya manksî"},
varieties = {"manksiya bakurî", "manksiya başûrî"},
scripts = Latn,
ancestors = {"mga"},
sort_key = {
from = {"ç", "-"},
to = {"c"}} ,
standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION,
}
m["ha"] = {
"hawsayî",
"Q56475",
"cdc-wst",
scripts = LatnArab,
sort_key = {
from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" },
to = {"b~" , "d~" , "k~", "y~", "y~", "" }},
entry_name = {
from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE},
to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}},
}
m["he"] = {
"îbranî",
"Q9288",
"sem-can",
scripts = {"Hebr", "Phnx", "Brai"},
entry_name = {
from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. "]"},
to = {}} ,
}
m["hi"] = {
"hindî",
"Q1568",
"inc-hnd",
scripts = {"Deva", "Kthi", "Newa"},
ancestors = {"inc-ohi"},
translit_module = "hi-translit",
standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION,
}
m["ho"] = {
"hirî motuyî",
"Q33617",
"crp",
scripts = Latn,
ancestors = {"meu"},
}
m["ht"] = {
"haîtî",
"Q33491",
"crp",
aliases = {"kreyolî"},
scripts = Latn,
ancestors = {"fr"},
}
m["hu"] = {
"mecarî",
"Q9067",
"urj-ugr",
scripts = {"Latn", "Hung"},
ancestors = {"ohu"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"},
to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"},
},
}
m["hy"] = {
"ermenî",
"Q8785",
"hyx",
aliases = {"ermeniya nûjen"},
varieties = {"ermeniya rojhilatî", "ermeniya rojavayî"},
scripts = {"Armn", "Brai"},
ancestors = {"axm"},
translit_module = "Armn-translit",
override_translit = true,
sort_key = {
from = {"ու", "և", "եւ"},
to = {"ւ", "եվ", "եվ"}},
entry_name = {
from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"", "", "", "", "եւ", "յ", "ի", "է"}} ,
}
m["hz"] = {
"hereroyî",
"Q33315",
"bnt-swb",
scripts = Latn,
}
m["ia"] = {
"înterlîngua",
"Q35934",
"art",
scripts = Latn,
}
m["id"] = {
"endonezyayî",
"Q9240",
"poz-mly",
scripts = Latn,
ancestors = {"ms"},
}
m["ie"] = {
"înterlîngue",
"Q35850",
"art",
aliases = {"oksîdentalî"},
scripts = Latn,
}
m["ig"] = {
"îgboyî",
"Q33578",
"alv-igb",
scripts = Latn,
}
m["ii"] = {
"yiyiya siçuwayî",
"Q34235",
"tbq-lol",
scripts = {"Yiii"},
}
m["ik"] = {
"înupîakî",
"Q27183",
"esx-inu",
scripts = Latn,
}
m["io"] = {
"îdoyî",
"Q35224",
"art",
scripts = Latn,
}
m["is"] = {
"îzlendî",
"Q294",
"gmq",
scripts = Latn,
ancestors = {"non"},
}
m["it"] = {
"îtalî",
"Q652",
"roa-itd",
scripts = Latn,
sort_key = {
from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"},
to = {"a" , "e" , "i" , "o" , "u" }} ,
}
m["iu"] = {
"înuîtî",
"Q29921",
"esx-inu",
scripts = {"Cans", "Latn"},
}
m["ja"] = {
"japonî",
"Q5287",
"jpx",
aliases = {"japoniya nûjen"},
scripts = {"Jpan", "Brai"},
ancestors = {"ojp"},
--[=[
-- Handled by jsort function in [[Module:ja]].
sort_key = {
from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"},
to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}},
--]=]
}
m["jv"] = {
"javayî",
"Q33549",
"poz-sus",
scripts = {"Latn", "Java"},
ancestors = {"kaw"},
link_tr = true,
}
m["ka"] = {
"gurcî",
"Q8108",
"ccs-gzn",
scripts = {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian
ancestors = {"oge"},
translit_module = "Geor-translit",
override_translit = true,
entry_name = {
from = {"̂"},
to = {""}},
}
m["kg"] = {
"kîkongoyî",
"Q33702",
"bnt-kng",
aliases = {"Kongoyî"},
scripts = Latn,
}
m["ki"] = {
"kîkûyûyî",
"Q33587",
"bnt-kka",
scripts = Latn,
}
m["kj"] = {
"kwanyamayî",
"Q1405077",
"bnt-ova",
scripts = Latn,
}
m["kk"] = {
"qazaxî",
"Q9252",
"trk-kno",
scripts = {"Cyrl", "Latn", "kk-Arab"},
translit_module = "kk-translit",
override_translit = true,
}
m["kl"] = {
"kalalîsûtî",
"Q25355",
"esx-inu",
aliases = {"Gronlandî"},
scripts = Latn,
}
m["km"] = {
"ximêrî",
"Q9205",
"mkh-kmr",
aliases = {"kamboçyayî"},
scripts = {"Khmr"},
ancestors = {"mkh-mkm"},
translit_module = "km-translit",
}
m["kn"] = {
"kannadayî",
"Q33673",
"dra",
scripts = {"Knda"},
ancestors = {"dra-mkn"},
translit_module = "kn-translit",
}
m["ko"] = {
"koreyî",
"Q9176",
"qfa-kor",
aliases = {"koreyiya nûjen"},
scripts = {"Kore", "Brai"},
ancestors = {"okm"},
translit_module = "ko-translit",
}
m["kr"] = {
"kanuriyî",
"Q36094",
"ssa-sah",
scripts = LatnArab,
sort_key = {
from = {"ny", "ǝ", "sh"},
to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
entry_name = {
from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE},
to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}},
}
m["ks"] = {
"keşmîrî",
aliases = {"koşurî"},
"Q33552",
"inc-dar",
scripts = {"ks-Arab", "Deva", "Shrd", "Latn"},
translit_module = "translit-redirect",
ancestors = {"inc-dar-pro"},
}
m["ku"] = { --[[Wîkîferheng:Dîwan/2023#Koda "ku" tê jêbirin]]
"kurmancî",
"Q36163",
"ira-ku",
aliases = {"kurdiya bakûr", "kurdiya jorîn"},
scripts = {"Latn", "ku-Arab", "Armn", "Cyrl"},
ancestors = {"ku-pro"},
translit_module = "translit-redirect",
}
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
m["kw"] = {
"kornî",
"Q25289",
"cel-bry",
scripts = Latn,
ancestors = {"cnx"},
}
m["ky"] = {
"kirgizî",
"Q9255",
"trk-kip",
scripts = {"Cyrl", "Latn", "Arab"},
translit_module = "ky-translit",
override_translit = true,
}
m["la"] = {
"latînî",
"Q397",
"itc",
scripts = Latn,
ancestors = {"itc-ola"},
entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE},
standardChars = "A-Za-zÆ挜Ā-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION,
}
m["lb"] = {
"luksembûrgî",
"Q9051",
"gmw",
scripts = Latn,
ancestors = {"gmh"},
}
m["lg"] = {
"lugandayî",
"Q33368",
"bnt-nyg",
aliases = {"Gandayî", "Olugandayî"},
scripts = Latn,
entry_name = {
from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" },
to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}},
sort_key = {
from = {"ŋ"},
to = {"n"}} ,
}
m["li"] = {
"lîmbûrgî",
"Q102172",
"gmw",
aliases = {"limburganî"},
scripts = Latn,
ancestors = {"dum"},
}
m["ln"] = {
"lingalayî",
"Q36217",
"bnt-bmo",
aliases = {"ngalayî"},
scripts = Latn,
}
m["lo"] = {
"lawsî",
"Q9211",
"tai-swe",
scripts = {"Laoo"},
translit_module = "lo-translit",
sort_key = {
from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"},
to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}},
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION,
}
m["lt"] = {
"lîtwanî",
"Q9083",
"bat",
scripts = Latn,
ancestors = {"olt"},
entry_name = {
from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE},
to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} ,
}
m["lu"] = {
"luba-katangayî",
"Q36157",
"bnt-lub",
scripts = Latn,
}
m["lv"] = {
"latviyayî",
"Q9078",
"bat",
aliases = {"letî"},
scripts = Latn,
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with
-- or without macrons. Specifically, there should be no macrons if the
-- vowel is part of a diphthong (including resonant diphthongs such
-- pìrksts -> pirksts not #pīrksts). What we do is first convert the
-- vowel + tone mark to a vowel + tilde in a decomposed fashion,
-- then remove the tilde in diphthongs, then convert the remaining
-- vowel + tilde sequences to macroned vowels, then delete any other
-- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār
-- occur before consonants. FIXME: This still might not be sufficient.
from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE},
to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}},
}
m["mg"] = {
"malagasî",
"Q7930",
"poz-bre",
scripts = Latn,
}
m["mh"] = {
"marşalî",
"Q36280",
"poz-mic",
scripts = Latn,
sort_key = {
from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" },
to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} ,
}
m["mi"] = {
"maorî",
"Q36451",
"poz-pep",
aliases = {"Māori"},
scripts = Latn,
}
m["mk"] = {
"makedonî",
"Q9296",
"zls",
scripts = Cyrl,
translit_module = "mk-translit",
entry_name = {
from = {ACUTE},
to = {}},
}
m["ml"] = {
"malayalamî",
"Q36236",
"dra",
scripts = {"Mlym"},
translit_module = "ml-translit",
override_translit = true,
}
m["mn"] = {
"mongolî",
"Q9246",
"xgn",
scripts = {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion
ancestors = {"cmg"},
translit_module = "mn-translit",
override_translit = true,
}
-- "mo" IS TREATED AS "ro", SEE WT:LT
m["mr"] = {
"maratî",
"Q1571",
"inc-sou",
scripts = {"Deva", "Modi"},
ancestors = {"omr"},
translit_module = "mr-translit",
}
m["ms"] = {
"malezî",
"Q9237",
"poz-mly",
aliases = {"malezyayî", "maleziya standard"},
scripts = {"Latn", "ms-Arab"},
}
m["mt"] = {
"maltayî",
"Q9166",
"sem-arb",
scripts = Latn,
ancestors = {"sqr"},
}
m["my"] = {
"burmayî",
"Q9228",
"tbq-brm",
aliases = {"mîyanmarî"},
scripts = {"Mymr"},
ancestors = {"obr"},
translit_module = "my-translit",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}},
}
m["na"] = {
"nawrûyî",
"Q13307",
"poz-mic",
scripts = Latn,
}
m["nb"] = {
"norweciya bokmålî",
"Q25167",
"gmq",
aliases = {"bokmål"},
scripts = Latn,
ancestors = {"gmq-mno"},
wikimedia_codes = {"no"},
}
m["nd"] = {
"ndebeliya bakurî",
"Q35613",
"bnt-ngu",
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["ne"] = {
"nepalî",
"Q33823",
"inc-pah",
scripts = {"Deva", "Newa"},
translit_module = "ne-translit",
}
m["ng"] = {
"ndongayî",
"Q33900",
"bnt-ova",
scripts = Latn,
}
m["nl"] = {
"holendî",
"Q7411",
"gmw",
scripts = Latn,
ancestors = {"dum"},
sort_key = {
from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["nn"] = {
"norweciya nînorskî",
"Q25164",
"gmq",
scripts = Latn,
ancestors = {"gmq-mno"},
}
m["no"] = {
"norwecî",
"Q9043",
"gmq",
scripts = Latn,
ancestors = {"gmq-mno"},
}
m["nr"] = {
"ndebeliya başûrî",
"Q36785",
"bnt-ngu",
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["nv"] = {
"navajoyî",
"Q13310",
"apa",
aliases = {"navahoyî"},
scripts = {"nv-Latn"},
sort_key = {
from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE},
to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l
}
m["ny"] = {
"çîçewayî",
"Q33273",
"bnt-nys",
scripts = Latn,
entry_name = {
from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" },
to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}},
sort_key = {
from = {"ng'"},
to = {"ng"}} ,
}
m["oc"] = {
"oksîtanî",
"Q14185",
"roa",
-- don't list varieties here that are in [[Module:etymology languages/data]]
scripts = {"Latn", "Hebr"},
ancestors = {"pro"},
sort_key = {
from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} ,
}
m["oj"] = {
"ojibweyî",
"Q33875",
"alg",
aliases = {"ojibwayî"},
scripts = {"Cans", "Latn"},
sort_key = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a~", "h~", "i~", "o~", "s~", "z~"}} ,
}
m["om"] = {
"oromoyî",
"Q33864",
"cus",
scripts = {"Latn", "Ethi"},
}
m["or"] = {
"oriyayî",
"Q33810",
"inc-eas",
scripts = {"Orya"},
ancestors = {"inc-mor"},
translit_module = "or-translit",
}
m["os"] = {
"osetî",
"Q33968",
"xsc",
scripts = {"Cyrl", "Geor", "Latn"},
ancestors = {"oos"},
translit_module = "os-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["pa"] = {
"puncabî",
"Q58635",
"inc-pan",
scripts = {"Guru", "pa-Arab"},
ancestors = {"inc-tak"},
translit_module = "translit-redirect",
entry_name = {
from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["pi"] = {
"palî",
"Q36727",
"pra",
scripts = {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"},
ancestors = {"sa"},
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "[ṇñṅ]", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a", "i", "u", "d", "l", "m" , "n" , "t", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
}
m["pl"] = {
"polonî",
"Q809",
"zlw-lch",
scripts = Latn,
ancestors = {"zlw-opl"},
sort_key = {
from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"},
to = {
"a" .. u(0x10FFFF),
"c" .. u(0x10FFFF),
"e" .. u(0x10FFFF),
"l" .. u(0x10FFFF),
"n" .. u(0x10FFFF),
"o" .. u(0x10FFFF),
"s" .. u(0x10FFFF),
"z" .. u(0x10FFFF),
"z" .. u(0x10FFFE)}} ,
}
m["ps"] = {
"peştûyî",
"Q58680",
"ira-pat",
aliases = {"peştû", "afganî"},
varieties = {"peştûya navendî", "peştûya bakurî", "peştûya başûrî"},
scripts = {"ps-Arab"},
ancestors = {"ira-pat-pro"},
}
m["pt"] = {
"portugalî",
"Q5146",
"roa-ibe",
aliases = {"portugaliya nûjen"},
scripts = {"Latn", "Brai"},
ancestors = {"roa-opt"},
sort_key = {
from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
}
m["qu"] = {
"keçwayî",
"Q5218",
"qwe",
scripts = Latn,
}
m["rm"] = {
"romancî",
"Q13199",
"roa-rhe",
aliases = {"romanşî"},
scripts = Latn,
}
m["ro"] = {
"romanyayî",
"Q7913",
"roa-eas",
scripts = {"Latn", "Cyrl"},
sort_key = {
from = {"ă" , "â" , "î" , "ș" , "ț" },
to = {"a~", "a~~", "i~", "s~", "t~"}},
}
m["ru"] = {
"rusî",
"Q7737",
"zle",
scripts = {"Cyrl", "Brai"},
translit_module = "ru-translit",
sort_key = {
from = {"ё"},
to = {"е" .. mw.ustring.char(0x10FFFF)}},
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER},
to = {"Е", "е", "И", "и"}},
standardChars = "ЁА-яё0-9—" .. PUNCTUATION,
}
m["rw"] = {
"kînyarwandayî",
"Q3217514",
"bnt-glb",
scripts = Latn,
entry_name = {
from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"},
to = {"a", "e" , "i", "o" , "u"} },
}
m["sa"] = {
"sanskrîtî",
"Q11059",
"inc-old",
scripts = {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Mymr", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"},
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "[ṇñṅ]", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a", "i", "u", "d", "l", "m" , "n" , "t", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
translit_module = "translit-redirect",
}
m["sc"] = {
"sardînî",
"Q33976",
"roa",
-- don't list varieties here that are in [[Module:etymology languages/data]]
scripts = Latn,
}
m["sd"] = {
"sindhî",
"Q33997",
"inc-snd",
scripts = {"sd-Arab", "Deva", "Sind", "Khoj"},
entry_name = {
from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)},
to = {u(0x0627)}},
ancestors = {"inc-vra"},
}
m["se"] = {
"samiya bakurî",
"Q33947",
"smi",
scripts = Latn,
entry_name = {
from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"},
to = {"a", "e" , "i", "o" , "u"} },
sort_key = {
from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" },
to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} },
standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION,
}
m["sg"] = {
"sangoyî",
"Q33954",
"crp",
scripts = Latn,
ancestors = {"ngb"},
}
m["sh"] = {
"sirboxirwatî",
"Q9301",
"zls",
-- don't list varieties here that are in [[Module:etymology languages/data]]
varieties = {"bosnî", "kroatî", "sirbî"},
scripts = {"Latn", "Cyrl", "Glag"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
wikimedia_codes = {"sh", "bs", "hr", "sr"},
}
m["si"] = {
"kîngalî",
"Q13267",
"inc-ins",
aliases = {"Singhalî", "Sinhalayî"},
scripts = {"Sinh"},
ancestors = {"elu-prk"},
translit_module = "si-translit",
override_translit = true,
}
m["sk"] = {
"slovakî",
"Q9058",
"zlw",
scripts = Latn,
sort_key = {
from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"},
to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} ,
}
m["sl"] = {
"slovenî",
"Q9063",
"zls",
aliases = {"Slovenyayî"},
scripts = Latn,
entry_name = {
from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"},
},
sort_key = {
from = {"č" , "š" , "ž" },
to = {"c²", "s²", "z²"},
},
}
m["sm"] = {
"samoayî",
"Q34011",
"poz-pnp",
scripts = Latn,
}
m["sn"] = {
"şonayî",
"Q34004",
"bnt-sho",
scripts = Latn,
entry_name = {remove_diacritics = ACUTE},
}
m["so"] = {
"somalî",
"Q13275",
"cus",
scripts = {"Latn", "Arab", "Osma"},
entry_name = {
from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} ,
}
m["sq"] = {
"albanî",
"Q8748",
"sqj",
-- don't list varieties here that are in [[Module:etymology languages/data]]
aliases = {"arnawidî"},
scripts = {"Latn", "Grek", "Elba"},
entry_name = {remove_diacritics = ACUTE},
sort_key = {
from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' },
to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } ,
}
m["ss"] = {
"swazî",
"Q34014",
"bnt-ngu",
aliases = {"Swatî"},
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["st"] = {
"sotoyiya başûr",
"Q34340",
"bnt-sts",
aliases = {"sotoyî"},
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["su"] = {
"sundanî",
"Q34002",
"poz-msa",
scripts = {"Latn", "Sund"},
translit_module = "su-translit",
}
m["sv"] = {
"swêdî",
"Q9027",
"gmq",
scripts = Latn,
ancestors = {"gmq-osw"},
}
m["sw"] = {
"swahîlî",
"Q7838",
"bnt-swh",
scripts = LatnArab,
sort_key = {
from = {"ng'", "^-"},
to = {"ngz"}} ,
}
m["ta"] = {
"tamîlî",
"Q5885",
"dra",
scripts = {"Taml"},
ancestors = {"oty"},
translit_module = "ta-translit",
override_translit = true,
}
m["te"] = {
"telûgûyî",
"Q8097",
"dra",
scripts = {"Telu"},
translit_module = "te-translit",
override_translit = true,
}
m["tg"] = {
"tacikî",
"Q9260",
"ira-swi",
scripts = {"Cyrl", "fa-Arab", "Latn"},
ancestors = {"pal"}, -- same as "fa", see WT:T:AFA
translit_module = "tg-translit",
override_translit = true,
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}} ,
entry_name = {
from = {ACUTE},
to = {}} ,
}
m["th"] = {
"tayî",
"Q9217",
"tai-swe",
scripts = {"Thai", "Brai"},
translit_module = "th-translit",
sort_key = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}},
}
m["ti"] = {
"tigrînî",
"Q34124",
"sem-eth",
aliases = {"tigrînyayî"},
scripts = {"Ethi"},
translit_module = "Ethi-translit",
}
m["tk"] = {
"tirkmenî",
"Q9267",
"trk-ogz",
scripts = {"Latn", "Cyrl"},
entry_name = {
from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON},
to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}},
ancestors = {"trk-ogz-pro"},
}
m["tl"] = {
"tagalogî",
"Q34057",
"phi",
scripts = {"Latn", "Tglg"},
entry_name = {
from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC},
to = {"a" , "e" , "i" , "o" , "u" }},
}
m["tn"] = {
"tswanayî",
"Q34137",
"bnt-sts",
aliases = {"Setswanayî"},
scripts = Latn,
}
m["to"] = {
"tongî",
"Q34094",
"poz-pol",
scripts = Latn,
sort_key = {
from = {"ā", "ē", "ī", "ō", "ū", MACRON},
to = {"a", "e", "i", "o", "u", ""}},
entry_name = {
from = {"á", "é", "í", "ó", "ú", ACUTE},
to = {"a", "e", "i", "o", "u", ""}},
}
m["tr"] = {
"tirkî",
"Q256",
"trk-ogz",
scripts = Latn,
ancestors = {"ota"},
}
m["ts"] = {
"tsongayî",
"Q34327",
"bnt-tsr",
scripts = Latn,
}
m["tt"] = {
"teterî",
"Q25285",
"trk-kbu",
scripts = {"Cyrl", "Latn", "tt-Arab"},
translit_module = "tt-translit",
override_translit = true,
}
-- "tw" IS TREATED AS "ak", SEE WT:LT
m["ty"] = {
"tahîtî",
"Q34128",
"poz-pep",
scripts = Latn,
}
m["ug"] = {
"oygurî",
"Q13263",
"trk-kar",
aliases = {"uygurî"},
scripts = {"ug-Arab", "Latn", "Cyrl"},
ancestors = {"chg"},
translit_module = "ug-translit",
override_translit = true,
}
m["uk"] = {
"ûkraynî",
"Q8798",
"zle",
scripts = Cyrl,
ancestors = {"orv"},
translit_module = "uk-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION,
}
m["ur"] = {
"urdûyî",
"Q1617",
"inc-hnd",
scripts = {"ur-Arab"},
ancestors = {"inc-ohi"},
translit_module = "pa-Arab-translit",
entry_name = {
from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["uz"] = {
"ozbekî",
"Q9264",
"trk-kar",
varieties = {"ozbekiya bakurî", "ozbekiya başûrî"},
scripts = {"Latn", "Cyrl", "fa-Arab"},
ancestors = {"chg"},
}
m["ve"] = {
"vendayî",
"Q32704",
"bnt-bso",
scripts = Latn,
}
m["vi"] = {
"viyetnamî",
"Q9199",
"mkh-vie",
scripts = {"Latn", "Hani"},
ancestors = {"mkh-mvi"},
sort_key = "vi-sortkey",
}
m["vo"] = {
"volapûkî",
"Q36986",
"art",
scripts = Latn,
}
m["wa"] = {
"walonî",
"Q34219",
"roa-oil",
scripts = Latn,
ancestors = {"fro"},
sort_key = {
from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} ,
}
m["wo"] = {
"wolofî",
"Q34257",
"alv-fwo",
scripts = LatnArab,
}
m["xh"] = {
"xosayî",
"Q13218",
"bnt-ngu",
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["yi"] = {
"yidîşî",
"Q8641",
"gmw",
scripts = {"Hebr"},
ancestors = {"gmh"},
sort_key = {
from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}} ,
translit_module = "yi-translit",
}
m["yo"] = {
"yorubayî",
"Q34311",
"alv-yor",
scripts = Latn,
sort_key = {
from = {"ẹ", "ọ", "gb"},
to = {"e~" , "o~", "g~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["za"] = {
"zhuangî",
"Q13216",
"tai",
scripts = {"Latn", "Hani"},
sort_key = {
from = {"%p"},
to = {""}},
}
m["zh"] = {
"çînî",
"Q7850",
"zhx",
scripts = {"Hani", "Brai", "Nshu"},
ancestors = {"ltc"},
sort_key = "zh-sortkey",
}
m["zu"] = {
"zuluyî",
"Q10179",
"bnt-ngu",
scripts = Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
return m