1209 lines
34 KiB
Perl
1209 lines
34 KiB
Perl
#! /bin/false
|
|
|
|
# vim: set autoindent shiftwidth=4 tabstop=4:
|
|
|
|
# Portable methods for locale handling.
|
|
# Copyright (C) 2002-2017 Guido Flohr <guido.flohr@cantanea.com>,
|
|
# all rights reserved.
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package Locale::Util;
|
|
|
|
use strict;
|
|
|
|
use constant DEBUG => 0;
|
|
|
|
use base qw (Exporter);
|
|
|
|
use vars qw (@EXPORT_OK);
|
|
|
|
@EXPORT_OK = qw (parse_http_accept_language
|
|
parse_http_accept_charset
|
|
set_locale set_locale_cache get_locale_cache
|
|
web_set_locale);
|
|
|
|
# The following list maps languages to a rough guess of the country that
|
|
# is most likely to be meant if no locale info for the country alone is
|
|
# found. I have assembled the list to the best of my knowledge, preferring
|
|
# the country that has the language as its official language, and in doubt
|
|
# listing the country that has the most speakers of that language. Corrections
|
|
# are welcome.
|
|
use constant LANG2COUNTRY => {
|
|
aa => 'ET', # Afar => Ethiopia
|
|
ab => 'AB', # Abkhazian => Georgia
|
|
# ae => '??', # Avestan => ??, Iran?
|
|
af => 'za', # Afrikaans => South Africa
|
|
am => 'ET', # Amharic => Ethiopia
|
|
ar => 'EG', # Arabic => Egypt
|
|
as => 'IN', # Assamese => India
|
|
ay => 'BO', # Aymara => Bolivia
|
|
az => 'AZ', # Azerbaijani => Azerbaijan
|
|
ba => 'RU', # Bashkir => Russia
|
|
be => 'BY', # Belarusian => Belarus
|
|
bg => 'BG', # Bulgarian => Bulgaria
|
|
bh => 'IN', # Bihari => India
|
|
bi => 'VU', # Bislama => Vanuatu
|
|
bn => 'BD', # Bengali => Bangladesh
|
|
bo => 'CN', # Tibetan => China
|
|
br => 'FR', # Breton => France
|
|
bs => 'BA', # Bosnian => Bosnia and Herzegovina
|
|
ca => 'ES', # Catalan => Spain
|
|
ce => 'RU', # Chechen => Russia
|
|
ch => '??', # Chamorro => Guam (or mp?)
|
|
co => 'FR', # Corsican => France
|
|
cs => 'CZ', # Czech => Czech Republic
|
|
cu => 'BG', # Church Slavic => Bulgaria
|
|
cv => 'RU', # Chuvash => Russia
|
|
cy => 'GB', # Welsh => United Kingdom
|
|
da => 'DK', # Danish => Denmark
|
|
de => 'DE', # German => Germany
|
|
dz => 'BT', # Dzongkha => Bhutan
|
|
el => 'GR', # Greek => Greece
|
|
en => 'US', # English => United States
|
|
es => 'ES', # Actually Mexico and the US have more Spanish speakers
|
|
# than Spain. But it can be assumed that they either add
|
|
# the country to their browser settings or will not care
|
|
# to much.
|
|
et => 'EE', # Estonian => Estonia
|
|
fa => 'IR', # Iran, Islamic Republic of
|
|
fi => 'FI', # Finnish => Finland
|
|
fj => 'FJ', # Fijian => Fiji
|
|
fo => 'FO', # Faeroese => Faroe Islands
|
|
fr => 'FR', # French => France
|
|
fy => 'FY', # Frisian => Netherlands
|
|
ga => 'IE', # Irish => Ireland
|
|
gd => 'GB', # Gaelic (Scots) => United Kingdom
|
|
gl => 'ES', # Gallegan => Spain
|
|
gn => 'PY', # Guarani => Paraguay
|
|
gu => 'IN', # Gujarati => IN
|
|
gv => 'GB', # Manx => United Kingdom
|
|
ha => 'NE', # Hausa => Niger (ng?)
|
|
he => 'IL', # Hebrew => Israel
|
|
hi => 'IN', # Hindi => India
|
|
ho => 'PG', # Hiri Motu => Papua New Guinea
|
|
hr => 'HR', # Croatian
|
|
hu => 'HU', # Hungarian => Hungary
|
|
hy => 'AM', # Armenian => Armenia
|
|
hz => 'NA', # Herero => Namibia
|
|
# ia => '??', # Interlingua (aka "latino sine flexione") => ??
|
|
id => 'ID', # Indonesian => Indonesia
|
|
# ie => '??', # Interlingue => ???
|
|
ik => 'US', # Inupiaq => United States
|
|
is => 'IS', # Icelandic => Iceland
|
|
it => 'IT', # Italian => Italy
|
|
iu => 'CA', # Inuktitut => Canada
|
|
iw => 'IL', # Hebrew => Israel
|
|
ja => 'JP', # Japanese => Japan
|
|
jw => 'ID', # Javanese => Indonesia
|
|
ka => 'GE', # Georgian => Georgia
|
|
ki => 'KE', # Kikuyu => Kenya
|
|
kj => 'AO', # Kuanyama => Angola (na?)
|
|
kk => 'KZ', # Kazakh => Kazakhstan
|
|
kl => 'GL', # Kalaallisut => Greenland
|
|
km => 'KH', # Khmer => Cambodia
|
|
kn => 'IN', # Kannada => India
|
|
ko => 'KR', # Korean => Korea, Republic of (more speakers than North Korea)
|
|
ks => 'IN', # Kashmiri => India
|
|
ku => 'TR', # Kurdish => Turkey
|
|
kv => 'RU', # Komi => Russia
|
|
kw => 'GB', # Cornish => United Kingdom
|
|
ky => 'KG', # Kirghyz => Kyrgyzstan
|
|
la => 'VA', # Latin => Holy See (Vatican City State)
|
|
lb => 'LU', # Letzeburgesch => Luxembourg
|
|
ln => 'CG', # Lingala => Republic of the Congo (cd?)
|
|
lo => 'LA', # Lao => Lao People's Democratic Republic
|
|
lt => 'LT', # Lithuanian => Lithuania
|
|
lv => 'LV', # Latvian => Latvia
|
|
mg => 'MG', # Malagasy => Madagascar
|
|
mh => 'MH', # Marshall => Marshall Islands
|
|
mi => 'NZ', # Maori => New Zealand
|
|
mk => 'MK', # Macedonian => Macedonia, the Former Yugoslav Republic of
|
|
ml => 'IN', # Malayalam => India
|
|
mn => 'MN', # Mongolian => Mongolia
|
|
mr => 'IN', # Marathi => India
|
|
ms => 'MY', # Malay => Malaysia (FIXME: not really sure ...)
|
|
mt => 'MT', # Maltese => Malta
|
|
my => 'MM', # Burmese => Myanmar
|
|
na => 'NR', # Nauru => Nauru
|
|
nb => 'NO', # Norwegian Bokmål => Norway
|
|
nd => 'ZA', # Ndebele, North => South Africa
|
|
ne => 'NP', # Nepali => Nepal
|
|
ng => 'NA', # Ndonga => Namibia
|
|
nl => 'NL', # Dutch => Netherlands
|
|
nn => 'NO', # Norwegian Nynorsk => Norway
|
|
no => 'NO', # Norwegian => Norway
|
|
nr => 'ZA', # Ndebele, South => South Africa
|
|
nv => 'US', # Navajo => United States
|
|
ny => 'MW', # Chichewa; Nyanja => Malawi
|
|
oc => 'FR', # Occitan (post 1500) => France
|
|
om => 'ET', # Oromo => Ethiopia
|
|
or => 'IN', # Oriya => India
|
|
os => 'RU', # Ossetian; Ossetic => Russia (FIXME: Or Georgia?)
|
|
pa => 'IN', # Panjabi => India
|
|
pi => 'IN', # Pali => India (FIXME: Or Thailand, Sri Lanka, Myanmar,
|
|
# Cambodia)
|
|
pl => 'PL', # Polish => Poland
|
|
ps => 'PK', # Pushto => Pakistan
|
|
pt => 'PT', # Portuguese => Portugal (following our rules this should
|
|
# actually be Brazil but that would be to unrealistic,
|
|
# people from Brazil set their locale to pt_BR).
|
|
qu => 'PE', # Quechua => Peru
|
|
rm => 'CH', # Rhaeto-Romance => Switzerland
|
|
rn => 'RW', # Rundi => Rwanda
|
|
ro => 'RO', # Romanian => Romania
|
|
ru => 'RU', # Russian => Russia
|
|
rw => 'RW', # Kinyarwanda => Rwanda
|
|
sa => 'IN', # Sanskrit => India
|
|
sc => 'IT', # Sardinian => Italy
|
|
sd => 'IN', # Sindhi => India
|
|
se => 'SE', # Sami => Sweden (Totally unsure here. The Sami languages
|
|
# are also spoken in Norway, Finland and Russia, but the
|
|
# largest part of the area seems to be in Sweden.
|
|
sg => '??', # Sango => Central African Republic
|
|
si => 'LK', # Sinhalese => Sri Lanka
|
|
sk => 'SK', # Slovakian => Slovakia
|
|
sl => 'SI', # Slovenian => Slovenia
|
|
sm => 'WS', # Samoan => Samoa
|
|
sh => 'ZW', # Shona => Zimbabwe (FIXME: Rather Mozambique?)
|
|
so => 'SO', # Somali => Somalia
|
|
sq => 'AL', # Albanian => Albania
|
|
sr => 'YU', # Serbian => Yugoslavia
|
|
ss => '??', # Swati => Swaziland (za?)
|
|
st => 'LS', # Sotho => Lesotho
|
|
su => 'IN', # Sundanese => Indonesia
|
|
sv => 'SE', # Swedish => Sweden
|
|
sw => 'TZ', # Suaheli => Tanzania, United Republic of
|
|
ta => 'LK', # Tamil => Sri Lanka
|
|
te => 'IN', # Telugu => India
|
|
tg => 'TJ', # Tajik => Tajikistan
|
|
th => 'TH', # Thai => Thailand
|
|
ti => 'ER', # Tigrinya => Eritrea
|
|
tk => 'TM', # Turkmen => Turkmenistan
|
|
tl => 'PH', # Tagalog => Philippines
|
|
tn => 'BW', # Tswana => Botswana
|
|
to => 'TO', # Tonga => Tonga
|
|
tr => 'TR', # Turkish => Turkish
|
|
tt => 'RU', # Tatar => Russia
|
|
tw => 'GH', # Twi => Ghana
|
|
ug => 'CN', # Uighur => China
|
|
uk => 'UA', # Ukrainian => Ukraine
|
|
ur => 'PK', # Urdu => Pakistan
|
|
uz => 'UZ', # Uzbek => Uzbekistan
|
|
vi => 'VN', # Vietnamese => Vietnam
|
|
# vo => '??', # Volapuk => Nowhere
|
|
wo => 'SN', # Wolof => Senegal
|
|
xh => 'ZA', # Xhosa => South Africa
|
|
yi => 'IL', # Yiddish => Israel (FIXME: Rather United States?)
|
|
yo => 'NG', # Yoruba => Nigeria
|
|
za => 'CN', # Zhuang => China
|
|
zh => 'CN', # Chinese => China
|
|
zu => 'ZA', # Zulu => South Africa
|
|
};
|
|
|
|
use constant WIN32LANGUAGE => {
|
|
aa => "Afar",
|
|
ab => "Abkhazian",
|
|
ae => "Avestan",
|
|
af => "Afrikaans",
|
|
am => "Amharic",
|
|
ar => "Arabic",
|
|
as => "Assamese",
|
|
ay => "Aymara",
|
|
az => "Azerbaijani",
|
|
ba => "Bashkir",
|
|
be => "Belarusian",
|
|
bg => "Bulgarian",
|
|
bh => "Bihari",
|
|
bi => "Bislama",
|
|
bn => "Bengali",
|
|
bo => "Tibetan",
|
|
br => "Breton",
|
|
bs => "Bosnian",
|
|
ca => "Catalan",
|
|
ce => "Chechen",
|
|
ch => "Chamorro",
|
|
co => "Corsican",
|
|
cs => "Czech",
|
|
cu => "Church Slavic",
|
|
cv => "Chuvash",
|
|
cy => "Welsh",
|
|
da => "Danish",
|
|
de => "German",
|
|
dz => "Dzongkha",
|
|
el => "Greek",
|
|
en => "English",
|
|
eo => "Esperanto",
|
|
es => "Spanish",
|
|
et => "Estonian",
|
|
eu => "Basque",
|
|
fa => "Persian",
|
|
fi => "Finnish",
|
|
fj => "Fijian",
|
|
fo => "Faeroese",
|
|
fr => "French",
|
|
fy => "Frisian",
|
|
ga => "Irish",
|
|
gd => "Gaelic (Scots)",
|
|
gl => "Gallegan",
|
|
gn => "Guarani",
|
|
gu => "Gujarati",
|
|
gv => "Manx",
|
|
ha => "Hausa",
|
|
he => "Hebrew",
|
|
hi => "Hindi",
|
|
ho => "Hiri Motu",
|
|
hr => "Croatian",
|
|
hu => "Hungarian",
|
|
hy => "Armenian",
|
|
hz => "Herero",
|
|
ia => "Interlingua",
|
|
id => "Indonesian",
|
|
ie => "Interlingue",
|
|
ik => "Inupiaq",
|
|
is => "Icelandic",
|
|
it => "Italian",
|
|
iu => "Inuktitut",
|
|
ja => "Japanese",
|
|
jw => "Javanese",
|
|
ka => "Georgian",
|
|
ki => "Kikuyu",
|
|
kj => "Kuanyama",
|
|
kk => "Kazakh",
|
|
kl => "Kalaallisut",
|
|
km => "Khmer",
|
|
kn => "Kannada",
|
|
ko => "Korean",
|
|
ks => "Kashmiri",
|
|
ku => "Kurdish",
|
|
kv => "Komi",
|
|
kw => "Cornish",
|
|
ky => "Kirghiz",
|
|
la => "Latin",
|
|
lb => "Letzeburgesch",
|
|
ln => "Lingala",
|
|
lo => "Lao",
|
|
lt => "Lithuanian",
|
|
lv => "Latvian",
|
|
mg => "Malagasy",
|
|
mh => "Marshall",
|
|
mi => "Maori",
|
|
# Sorry, lads, but that is what M$ calls your language ...
|
|
mk => "FYRO Macedonian",
|
|
ml => "Malayalam",
|
|
mn => "Mongolian",
|
|
mo => "Moldavian",
|
|
mr => "Marathi",
|
|
ms => "Malay",
|
|
mt => "Maltese",
|
|
my => "Burmese",
|
|
na => "Nauru",
|
|
nb => "Norwegian (Bokmål)",
|
|
nd => "Ndebele, North",
|
|
ne => "Nepali",
|
|
ng => "Ndonga",
|
|
nl => "Dutch",
|
|
nn => "Norwegian-Nynorsk",
|
|
no => "Norwegian-Nynorsk",
|
|
nr => "Ndebele, South",
|
|
nv => "Navajo",
|
|
ny => "Chichewa",
|
|
oc => "Occitan (post 1500)",
|
|
om => "Oromo",
|
|
or => "Oriya",
|
|
os => "Ossetian",
|
|
pa => "Panjabi",
|
|
pi => "Pali",
|
|
pl => "Polish",
|
|
ps => "Pushto",
|
|
pt => "Portuguese",
|
|
qu => "Quechua",
|
|
rm => "Rhaeto-Romance",
|
|
rn => "Rundi",
|
|
ro => "Romanian",
|
|
ru => "Russian",
|
|
rw => "Kinyarwanda",
|
|
sa => "Sanskrit",
|
|
sc => "Sardinian",
|
|
sd => "Sindhi",
|
|
se => "Sami",
|
|
sg => "Sango",
|
|
si => "Sinhalese",
|
|
sk => "Slovak",
|
|
sl => "Slovenian",
|
|
sm => "Samoan",
|
|
sn => "Shona",
|
|
so => "Somali",
|
|
sq => "Albanian",
|
|
sr => "Serbian",
|
|
ss => "Swati",
|
|
st => "Sotho",
|
|
su => "Sundanese",
|
|
sv => "Swedish",
|
|
sw => "Swahili",
|
|
ta => "Tamil",
|
|
te => "Telugu",
|
|
tg => "Tajik",
|
|
th => "Thai",
|
|
ti => "Tigrinya",
|
|
tk => "Turkmen",
|
|
tl => "Tagalog",
|
|
tn => "Tswana",
|
|
to => "Tonga",
|
|
tr => "Turkish",
|
|
ts => "Tsonga",
|
|
tt => "Tatar",
|
|
tw => "Twi",
|
|
ug => "Uighur",
|
|
uk => "Ukrainian",
|
|
ur => "Urdu",
|
|
uz => "Uzbek",
|
|
vi => "Vietnamese",
|
|
vo => "Volapuk",
|
|
wo => "Wolof",
|
|
xh => "Xhosa",
|
|
yi => "Yiddish",
|
|
yo => "Yoruba",
|
|
za => "Zhuang",
|
|
zh => "Chinese",
|
|
zu => "Zulu",
|
|
};
|
|
|
|
use constant WIN32COUNTRY => {
|
|
ad => "Andorra",
|
|
ae => "United Arab Emirates",
|
|
af => "Afghanistan",
|
|
ag => "Antigua and Barbuda",
|
|
ai => "Anguilla",
|
|
al => "Albania",
|
|
am => "Armenia",
|
|
an => "Netherlands Antilles",
|
|
ao => "Angola",
|
|
aq => "Antarctica",
|
|
ar => "Argentina",
|
|
as => "American Samoa",
|
|
at => "Austria",
|
|
au => "Australia",
|
|
aw => "Aruba",
|
|
ax => "Aland Islands",
|
|
az => "Azerbaijan",
|
|
ba => "Bosnia and Herzegovina",
|
|
bb => "Barbados",
|
|
bd => "Bangladesh",
|
|
be => "Belgium",
|
|
bf => "Burkina Faso",
|
|
bg => "Bulgaria",
|
|
bh => "Bahrain",
|
|
bi => "Burundi",
|
|
bj => "Benin",
|
|
bm => "Bermuda",
|
|
bn => "Brunei Darussalam",
|
|
bo => "Bolivia",
|
|
br => "Brazil",
|
|
bs => "Bahamas",
|
|
bt => "Bhutan",
|
|
bv => "Bouvet Island",
|
|
bw => "Botswana",
|
|
by => "Belarus",
|
|
bz => "Belize",
|
|
ca => "Canada",
|
|
cc => "Cocos (Keeling) Islands",
|
|
cd => "Congo, The Democratic Republic of the",
|
|
cf => "Central African Republic",
|
|
cg => "Congo",
|
|
ch => "Switzerland",
|
|
ci => "Cote D'Ivoire",
|
|
ck => "Cook Islands",
|
|
cl => "Chile",
|
|
cm => "Cameroon",
|
|
cn => "China",
|
|
co => "Colombia",
|
|
cr => "Costa Rica",
|
|
cs => "Serbia and Montenegro",
|
|
cu => "Cuba",
|
|
cv => "Cape Verde",
|
|
cx => "Christmas Island",
|
|
cy => "Cyprus",
|
|
cz => "Czech Republic",
|
|
de => "Germany",
|
|
dj => "Djibouti",
|
|
dk => "Denmark",
|
|
dm => "Dominica",
|
|
do => "Dominican Republic",
|
|
dz => "Algeria",
|
|
ec => "Ecuador",
|
|
ee => "Estonia",
|
|
eg => "Egypt",
|
|
eh => "Western Sahara",
|
|
er => "Eritrea",
|
|
es => "Spain",
|
|
et => "Ethiopia",
|
|
fi => "Finland",
|
|
fj => "Fiji",
|
|
fk => "Falkland Islands (Malvinas)",
|
|
fm => "Micronesia, Federated States of",
|
|
fo => "Faroe Islands",
|
|
fr => "France",
|
|
fx => "France, Metropolitan",
|
|
ga => "Gabon",
|
|
gb => "United Kingdom",
|
|
gd => "Grenada",
|
|
ge => "Georgia",
|
|
gf => "French Guiana",
|
|
gh => "Ghana",
|
|
gi => "Gibraltar",
|
|
gl => "Greenland",
|
|
gm => "Gambia",
|
|
gn => "Guinea",
|
|
gp => "Guadeloupe",
|
|
gq => "Equatorial Guinea",
|
|
gr => "Greece",
|
|
gs => "South Georgia and the South Sandwich Islands",
|
|
gt => "Guatemala",
|
|
gu => "Guam",
|
|
gw => "Guinea-Bissau",
|
|
gy => "Guyana",
|
|
hk => "Hong Kong",
|
|
hm => "Heard Island and McDonald Islands",
|
|
hn => "Honduras",
|
|
hr => "Croatia",
|
|
ht => "Haiti",
|
|
hu => "Hungary",
|
|
id => "Indonesia",
|
|
ie => "Ireland",
|
|
il => "Israel",
|
|
in => "India",
|
|
io => "British Indian Ocean Territory",
|
|
iq => "Iraq",
|
|
ir => "Iran",
|
|
is => "Iceland",
|
|
it => "Italy",
|
|
jm => "Jamaica",
|
|
jo => "Jordan",
|
|
jp => "Japan",
|
|
ke => "Kenya",
|
|
kg => "Kyrgyzstan",
|
|
kh => "Cambodia",
|
|
ki => "Kiribati",
|
|
km => "Comoros",
|
|
kn => "Saint Kitts and Nevis",
|
|
kp => "North-Korea",
|
|
kr => "Korea",
|
|
kw => "Kuwait",
|
|
ky => "Cayman Islands",
|
|
kz => "Kazakhstan",
|
|
la => "Laos",
|
|
lb => "Lebanon",
|
|
lc => "Saint Lucia",
|
|
li => "Liechtenstein",
|
|
lk => "Sri Lanka",
|
|
lr => "Liberia",
|
|
ls => "Lesotho",
|
|
lt => "Lithuania",
|
|
lu => "Luxembourg",
|
|
lv => "Latvia",
|
|
ly => "Libyan",
|
|
ma => "Morocco",
|
|
mc => "Monaco",
|
|
md => "Moldova",
|
|
mg => "Madagascar",
|
|
mh => "Marshall Islands",
|
|
mk => "Former Yugoslav Republic of Macedonia",
|
|
ml => "Mali",
|
|
mm => "Myanmar",
|
|
mn => "Mongolia",
|
|
mo => "Macao",
|
|
mp => "Northern Mariana Islands",
|
|
mq => "Martinique",
|
|
mr => "Mauritania",
|
|
ms => "Montserrat",
|
|
mt => "Malta",
|
|
mu => "Mauritius",
|
|
mv => "Maldives",
|
|
mw => "Malawi",
|
|
mx => "Mexico",
|
|
my => "Malaysia",
|
|
mz => "Mozambique",
|
|
na => "Namibia",
|
|
nc => "New Caledonia",
|
|
ne => "Niger",
|
|
nf => "Norfolk Island",
|
|
ng => "Nigeria",
|
|
ni => "Nicaragua",
|
|
nl => "Netherlands",
|
|
no => "Norway",
|
|
np => "Nepal",
|
|
nr => "Nauru",
|
|
nu => "Niue",
|
|
nz => "New Zealand",
|
|
om => "Oman",
|
|
pa => "Panama",
|
|
pe => "Peru",
|
|
pf => "French Polynesia",
|
|
pg => "Papua New Guinea",
|
|
ph => "Philippines",
|
|
pk => "Pakistan",
|
|
pl => "Poland",
|
|
pm => "Saint Pierre and Miquelon",
|
|
pn => "Pitcairn",
|
|
pr => "Puerto Rico",
|
|
ps => "Palestinian Territory, Occupied",
|
|
pt => "Portugal",
|
|
pw => "Palau",
|
|
py => "Paraguay",
|
|
qa => "Qatar",
|
|
re => "Reunion",
|
|
ro => "Romania",
|
|
ru => "Russian Federation",
|
|
rw => "Rwanda",
|
|
sa => "Saudi Arabia",
|
|
sb => "Solomon Islands",
|
|
sc => "Seychelles",
|
|
sd => "Sudan",
|
|
se => "Sweden",
|
|
sg => "Singapore",
|
|
sh => "Saint Helena",
|
|
si => "Slovenia",
|
|
sj => "Svalbard and Jan Mayen",
|
|
sk => "Slovakia",
|
|
sl => "Sierra Leone",
|
|
sm => "San Marino",
|
|
sn => "Senegal",
|
|
so => "Somalia",
|
|
sr => "Suriname",
|
|
st => "Sao Tome and Principe",
|
|
sv => "El Salvador",
|
|
sy => "Syrian Arab Republic",
|
|
sz => "Swaziland",
|
|
tc => "Turks and Caicos Islands",
|
|
td => "Chad",
|
|
tf => "French Southern Territories",
|
|
tg => "Togo",
|
|
th => "Thailand",
|
|
tj => "Tajikistan",
|
|
tk => "Tokelau",
|
|
tl => "Timor-Leste",
|
|
tm => "Turkmenistan",
|
|
tn => "Tunisia",
|
|
to => "Tonga",
|
|
tr => "Turkey",
|
|
tt => "Trinidad and Tobago",
|
|
tv => "Tuvalu",
|
|
tw => "Taiwan, Province of China",
|
|
tz => "Tanzania, United Republic of",
|
|
ua => "Ukraine",
|
|
ug => "Uganda",
|
|
um => "United States Minor Outlying Islands",
|
|
us => "United States",
|
|
uy => "Uruguay",
|
|
uz => "Uzbekistan",
|
|
va => "Holy See (Vatican City State)",
|
|
vc => "Saint Vincent and the Grenadines",
|
|
ve => "Venezuela",
|
|
vg => "Virgin Islands, British",
|
|
vi => "Virgin Islands, U.S.",
|
|
vn => "Vietnam",
|
|
vu => "Vanuatu",
|
|
wf => "Wallis and Futuna",
|
|
ws => "Samoa",
|
|
ye => "Yemen",
|
|
yt => "Mayotte",
|
|
za => "South Africa",
|
|
zm => "Zambia",
|
|
zw => "Zimbabwe",
|
|
};
|
|
|
|
my $locale_cache;
|
|
|
|
sub parse_http_accept_language {
|
|
my ($string) = @_;
|
|
|
|
my @tokens = split / *, */, $string;
|
|
|
|
my %retval;
|
|
foreach my $token (@tokens) {
|
|
my $quality = 1;
|
|
# This RE is more forgiving than the standard. It accepts
|
|
# values greater than 1.0 and with more fractional digits
|
|
# than 3.
|
|
if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
|
|
$quality = $1;
|
|
}
|
|
$retval{$token} = $quality;
|
|
}
|
|
|
|
# RFC 2616 only allows 1-8 characters for language and country
|
|
# but we are more forgiving.
|
|
return grep {
|
|
/^[A-Za-z]+(?:-[A-Za-z]+)?$/
|
|
} map {
|
|
$_ = 'C' if $_ eq '*'; $_
|
|
} sort {
|
|
$retval{$b} <=> $retval{$a}
|
|
} keys %retval;
|
|
}
|
|
|
|
sub parse_http_accept_charset {
|
|
my ($string) = @_;
|
|
|
|
my @tokens = split / *, */, $string;
|
|
|
|
my %retval;
|
|
foreach my $token (@tokens) {
|
|
my $quality = 1;
|
|
# This RE is more forgiving than the standard. It accepts
|
|
# values greater than 1.0 and with more fractional digits
|
|
# than 3.
|
|
if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
|
|
$quality = $1;
|
|
}
|
|
$retval{$token} = $quality;
|
|
}
|
|
|
|
return grep {
|
|
# This is really allowed in character set names ...
|
|
/^[-!\#\$\%\&\'\+\.0-9A-Z_\`a-z\|\~]+$/
|
|
} map {
|
|
$_ = undef if $_ eq '*'; $_
|
|
} sort {
|
|
$retval{$b} <=> $retval{$a}
|
|
} keys %retval;
|
|
}
|
|
|
|
sub set_locale {
|
|
my ($category, $language, $country, $charset) = @_;
|
|
|
|
require POSIX;
|
|
|
|
$country = '' unless defined $country;
|
|
$charset = '' unless defined $charset;
|
|
|
|
my $set_locale;
|
|
# Look up the cache first.
|
|
if (my $retval = $locale_cache->{$language}->{$country}->{$charset}) {
|
|
my ($locale, $country) = @$retval;
|
|
POSIX::setlocale ($category, $locale);
|
|
return @$retval;
|
|
}
|
|
|
|
# Initialize the cache with the undefined value so that we can do
|
|
# error returns without setting it.
|
|
$locale_cache->{$language}->{$country}->{$charset} = undef;
|
|
|
|
my $windows = ($^O !~ /darwin/i && $^O =~ /win/i) ? 1 : 0;
|
|
if ($windows) {
|
|
return &__set_locale_windows;
|
|
}
|
|
|
|
my $set_language;
|
|
my $set_country;
|
|
|
|
# First we try to only use the language.
|
|
my @languages = ($language);
|
|
my @lc_languages = map { lc $_ } @languages;
|
|
my @uc_languages = map { uc $_ } @languages;
|
|
my %seen = ();
|
|
|
|
foreach my $language (@languages, @lc_languages, @uc_languages) {
|
|
next if $seen{$language}++;
|
|
warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
|
|
my $result = POSIX::setlocale ($category, $language);
|
|
if ($result) {
|
|
$set_locale = $set_language = $result if $result;
|
|
last;
|
|
}
|
|
}
|
|
|
|
# Now try it with the country appended.
|
|
my @countries = length $country ? ($country) : ();
|
|
my @uc_countries = map { uc $_ } @countries;
|
|
my @lc_countries = map { uc $_ } @countries;
|
|
push @countries, @uc_countries, @lc_countries;
|
|
|
|
LINGUA: foreach my $language (@languages, @lc_languages, @uc_languages) {
|
|
my $count = 0;
|
|
my @guessed_countries = (LANG2COUNTRY->{lc $language},
|
|
lc LANG2COUNTRY->{lc $language},
|
|
uc LANG2COUNTRY->{lc $language});
|
|
foreach my $c (@countries, @guessed_countries) {
|
|
++$count;
|
|
next unless defined $c && length $c;
|
|
my $try = $language . '_' . $c;
|
|
next if $seen{$try}++;
|
|
warn "Trying setlocale '$try'.\n" if DEBUG;
|
|
my $result = POSIX::setlocale ($category, $try);
|
|
if ($result) {
|
|
$set_locale = $result;
|
|
if ($count >= @countries) {
|
|
$set_country = $c;
|
|
} else {
|
|
$set_country = $country;
|
|
}
|
|
|
|
last LINGUA;
|
|
}
|
|
}
|
|
}
|
|
|
|
unless (length $charset) {
|
|
return unless defined $set_locale && length $set_locale;
|
|
|
|
$locale_cache->{$language}->{$country}->{$charset} =
|
|
[$set_locale, $set_country];
|
|
return wantarray ? ($set_locale, $set_country) : $set_locale;
|
|
}
|
|
|
|
my @charsets = ($charset);
|
|
my $cleaned = $charset;
|
|
push @charsets, $cleaned if $cleaned =~ s/-//g;
|
|
my @lc_charsets = map { lc $charset } @charsets;
|
|
my @uc_charsets = map { uc $charset } @charsets;
|
|
push @charsets, @lc_charsets, @uc_charsets;
|
|
|
|
%seen = ();
|
|
LINGUA2: foreach my $language (@languages,
|
|
@lc_languages, @uc_languages) {
|
|
my @guessed_countries = (LANG2COUNTRY->{lc $language},
|
|
lc LANG2COUNTRY->{lc $language},
|
|
uc LANG2COUNTRY->{lc $language});
|
|
my $count = 0;
|
|
foreach my $c (@countries, @guessed_countries) {
|
|
++$count;
|
|
$c = '' unless defined $c && length $c;
|
|
my $country_try = $language;
|
|
$country_try .= (length $c) ? "_$c" : '';
|
|
|
|
foreach my $ch (@charsets, @lc_charsets, @uc_charsets) {
|
|
my $try = $country_try . '.' . $ch;
|
|
next if $seen{$try}++;
|
|
warn "Trying setlocale '$try'.\n" if DEBUG;
|
|
|
|
my $result = POSIX::setlocale ($category, $try);
|
|
if ($result) {
|
|
$set_locale = $result;
|
|
if ($count >= @countries) {
|
|
$set_country = $c;
|
|
} else {
|
|
$set_country = $country;
|
|
}
|
|
|
|
last LINGUA2;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return unless defined $set_locale && length $set_locale;
|
|
|
|
$locale_cache->{$language}->{$country}->{$charset} =
|
|
[$set_locale, $set_country];
|
|
|
|
return wantarray ? ($set_locale, $set_country) : $set_locale;
|
|
}
|
|
|
|
sub __set_locale_windows {
|
|
my ($category, $language, $country, $charset) = @_;
|
|
|
|
my $set_locale;
|
|
|
|
$country = '' unless defined $country;
|
|
$charset = '' unless defined $charset;
|
|
|
|
# First we try to only use the language.
|
|
my $long_language = WIN32LANGUAGE->{lc $language};
|
|
my @languages = ($long_language, $language);
|
|
my %seen = ();
|
|
foreach my $language (@languages) {
|
|
next if $seen{$language}++;
|
|
warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
|
|
my $result = POSIX::setlocale ($category, $language);
|
|
if ($result) {
|
|
$set_locale = $result if $result;
|
|
last;
|
|
}
|
|
}
|
|
|
|
# Now try it with the country appended.
|
|
my $set_country;
|
|
if (length $country) {
|
|
COMBI: foreach my $language (@languages) {
|
|
# We do not need a fallback country here, because the "system" already
|
|
# provides the information.
|
|
my @short_countries = ($country);
|
|
my @countries = map {
|
|
WIN32COUNTRY->{lc $_}
|
|
} grep { length $_ } @short_countries;
|
|
foreach my $c (@countries) {
|
|
next unless defined $c && length $c;
|
|
my $try = $language . '_' . $c;
|
|
next if $seen{$try}++;
|
|
warn "Trying setlocale '$try'.\n" if DEBUG;
|
|
my $result = POSIX::setlocale ($category, $try);
|
|
if ($result) {
|
|
$set_locale = $result;
|
|
$set_country = $c;
|
|
last COMBI;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return unless defined $set_locale && length $set_locale;
|
|
|
|
# Apparently, there is no point in setting a charset. Even the new
|
|
# MS-DOS versions like 2000 or XP still have the concept of more or
|
|
# less fixed codepages. Switching to UTF-8 does not work.
|
|
$locale_cache->{$language}->{$country}->{$charset} =
|
|
[$set_locale, $set_country];
|
|
return wantarray ? ($set_locale, $set_country) : $set_locale;
|
|
}
|
|
|
|
sub get_locale_cache {
|
|
$locale_cache;
|
|
}
|
|
|
|
sub set_locale_cache {
|
|
if (ref $_[0] && 'HASH' eq ref $_[0]) {
|
|
$locale_cache = $_[0];
|
|
} else {
|
|
my %locale_cache = @_;
|
|
$locale_cache = \%locale_cache;
|
|
}
|
|
}
|
|
|
|
sub web_set_locale {
|
|
my ($accept_language, $accept_charset, $category, $available) = @_;
|
|
|
|
my %available;
|
|
if ($available) {
|
|
foreach (@$available) {
|
|
my $locale = $_;
|
|
$locale =~ s/[_\@\.].*//;
|
|
$available{lc $locale} = 1;
|
|
}
|
|
}
|
|
|
|
my @languages;
|
|
if (ref $accept_language && 'ARRAY' eq ref $accept_language) {
|
|
@languages = @$accept_language;
|
|
} else {
|
|
@languages = parse_http_accept_language $accept_language;
|
|
}
|
|
|
|
if ($available) {
|
|
my @all = @languages;
|
|
@languages = ();
|
|
foreach my $locale (@all) {
|
|
my $language = lc $locale;
|
|
$language =~ s/[_\@\.].*//;
|
|
push @languages, $locale if $available{$language};
|
|
}
|
|
}
|
|
|
|
my @charsets;
|
|
if (defined $accept_charset) {
|
|
if (ref $accept_charset && 'ARRAY' eq ref $accept_charset) {
|
|
@charsets = @$accept_charset;
|
|
} else {
|
|
@charsets = parse_http_accept_charset $accept_charset;
|
|
}
|
|
}
|
|
|
|
unless (defined $category) {
|
|
require POSIX;
|
|
$category = POSIX::LC_ALL();
|
|
}
|
|
|
|
my ($set_locale, $set_language, $set_country, $set_charset);
|
|
foreach my $lang (@languages) {
|
|
my ($language, $country) = split /-/, $lang, 2;
|
|
|
|
my ($locale, $country_used) =
|
|
set_locale ($category, $language, $country, $charsets[0]);
|
|
|
|
if (defined $locale) {
|
|
# If a country was specified, we have to check whether it
|
|
# was actually selected.
|
|
if (defined $country) {
|
|
if (!defined $country
|
|
|| ($country ne $country_used)) {
|
|
$set_language = $language;
|
|
$set_locale = $locale;
|
|
$set_country = $country_used;
|
|
$set_charset = $charsets[0];
|
|
}
|
|
}
|
|
|
|
if (wantarray) {
|
|
return $locale, $lang, $country_used, $charsets[0];
|
|
} else {
|
|
return $locale;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (defined $set_locale) {
|
|
if (wantarray) {
|
|
return $set_locale, $set_language, $set_country, $set_charset;
|
|
} else {
|
|
return $set_locale;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Locale::Util - Portable l10n and i10n functions
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Locale::Util;
|
|
|
|
my @linguas = parse_http_accept_language $ENV{HTTP_ACCEPT_LANGUAGE};
|
|
|
|
my @charsets = parse_http_accept_charset $ENV{HTTP_ACCEPT_CHARSET};
|
|
|
|
# Trie to set the locale to Brasilian Portuguese in UTF-8.
|
|
my $set_locale = set_locale LC_ALL, 'pt', 'BR', 'utf-8';
|
|
|
|
set_locale_cache $last_cache;
|
|
|
|
my $cache = get_locale_cache;
|
|
|
|
web_set_locale ($ENV{HTTP_ACCEPT_LANGUAGE}, $ENV_ACCEPT_CHARSET);
|
|
|
|
web_set_locale (['fr-BE', 'fr', 'it'], ['cp1252', 'utf-8']);
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module provides portable functions dealing with localization
|
|
(l10n) and internationalization(i10n). It doesn't export anything
|
|
by default, you have to specify each function you need in the import
|
|
list, or use the fully qualified name.
|
|
|
|
The functions here have a focus on web development, although they
|
|
are general enough to have them in the Locale:: namespace.
|
|
|
|
This module is considered alpha code. The interface is not stable.
|
|
Please contact the author if you want to use it in production code.
|
|
|
|
This module was introduced in libintl-perl 1.17.
|
|
|
|
=head1 FUNCTIONS
|
|
|
|
=over 4
|
|
|
|
=item B<parse_http_accept_language STRING>
|
|
|
|
Parses a string as passed in the HTTP header "Accept-Language".
|
|
It returns a list of tokens sorted by the quality value, see RFC 2616
|
|
for details.
|
|
|
|
Example:
|
|
|
|
parse_http_accept ("fr-fr, fr; q=0.7, de; q=0.3");
|
|
|
|
This means: Give me French for France with a quality value of 1.0
|
|
(the maximum). Otherwise I will take any other French version
|
|
(quality 0.7), German has a quality of 0.3 for me.
|
|
|
|
The function will return a list of tokens in the order of their quality
|
|
values, in this case "fr-fr", "fr" and "de".
|
|
|
|
The function is more forgiving than RFC 2616. It accepts quality
|
|
values greater than 1.0 and with more than 3 decimal places. It
|
|
also accepts languages and country names with more than 8 characters.
|
|
The language "*" is translated into "C".
|
|
|
|
=item B<parse_http_accept_charset STRING>
|
|
|
|
Parses a string as passed in the HTTP header "Accept-Charset".
|
|
It returns a list of tokens sorted by the quality value, see RFC 2616
|
|
for details.
|
|
|
|
The special character set "*" (means all character sets) will be
|
|
translated to the undefined value.
|
|
|
|
=item B<set_locale CATEGORY, LANGUAGE[, COUNTRY, CHARSET]>
|
|
|
|
Tries to set the user locale by means of POSIX::setlocale(). The latter
|
|
function has the disadvantage, that its second argument (the locale
|
|
description string) is completely non-standard and system-dependent.
|
|
This function tries its best at guessing the system's notion of a locale
|
|
dientifier, with the arguments supplied:
|
|
|
|
=over 8
|
|
|
|
=item B<CATEGORY>
|
|
|
|
An integer argument for a valid locale category. These are the
|
|
LC_* constants (LC_ALL, LC_CTIME, LC_COLLATE, ...) defined in both
|
|
Locale::Messages(3pm) and POSIX(3pm).
|
|
|
|
=item B<LANGUAGE>
|
|
|
|
A 2-letter language identifier as per ISO 639. Case doesn't matter,
|
|
but an unchanged version (ie. not lower-cased) of the language you
|
|
provided will always be tried to.
|
|
|
|
=item B<COUNTRY>
|
|
|
|
A 2-letter language identifier as per ISO 639. Case doesn't matter,
|
|
but an unchanged version (ie. not lower-cased) of the language you
|
|
provided will always be tried to.
|
|
|
|
This parameter is optional. If it is not defined, the function will
|
|
try to guess an appropriate country, otherwise leave it to the
|
|
operating system.
|
|
|
|
=item B<CHARSET>
|
|
|
|
A valid charset name. Valid means valid! The charset "utf8" is not
|
|
valid (it is "utf-8"). Charset names that are accepted by the
|
|
guessing algorithms in Encode(3pm) are also not necessarily valid.
|
|
|
|
If the parameter is undefined, it is ignored. It is always ignored
|
|
under Windows.
|
|
|
|
=back
|
|
|
|
The function tries to approach the desired locale in loops, refining
|
|
it on every success. It will first try to set the language (for
|
|
any country), then try to select the correct language, and finally
|
|
try to select the correct charset.
|
|
|
|
The return value is false in case of failure, or the return value
|
|
of the underlying POSIX::setlocale() call in case of success.
|
|
|
|
In array context, the function returns the country name
|
|
that was passed in the successful
|
|
call to POSIX::setlocale(). If this string is equal to the country
|
|
name you passed as an argument, you can be reasonably sure that
|
|
the settings for this country are really used. If it is not
|
|
equal, the function has taken a guess at the country (it has a list
|
|
of "default" countries for each language). It seems that under
|
|
Windows, POSIX::setlocale() also succeeds, if you pass a country
|
|
name that is actually not supported. Therefore, the information
|
|
is not completely reliable.
|
|
|
|
Please note that this function is intended for server processes
|
|
(especially web applications) that need to switch in a portable
|
|
way to a certain locale. It is B<not> the recommended way to set
|
|
the program locale for a regular application. In a regular application
|
|
you should do the following:
|
|
|
|
use POSIX qw (setlocale LC_ALL);
|
|
setlocale LC_ALL, '';
|
|
|
|
The empty string as the second argument means, that the system
|
|
should switch to the user's default locale.
|
|
|
|
=item B<get_locale_cache>
|
|
|
|
The function set_locale() is potentially expansive, especially when
|
|
it fails, because it can try a lot of different combinations, and
|
|
the system may have to load a lot of locale definitions from its
|
|
internal database.
|
|
|
|
In order to speed up things, results are internally cached in a
|
|
hash, keys are the languages, subkeys countries, subsubkeys the
|
|
charsets. You can get a reference to this hash with get_locale_cache().
|
|
|
|
The function cannot fail.
|
|
|
|
=item B<set_locale_cache HASH>
|
|
|
|
Sets the internal cache. You can either pass a hash or a hash reference.
|
|
The function will use this as its cache, discarding its old cache.
|
|
This allows you to keep the hash persistent.
|
|
|
|
The function cannot fail.
|
|
|
|
=item B<web_set_locale (ACCEPT_LANGUAGE, ACCEPT_CHARSET, CATEGORY,
|
|
AVAILABLE)>
|
|
|
|
Try to change the locale to the settings described by ACCEPT_LANGUAGE
|
|
and ACCEPT_CHARSET. For each argument you can either pass a string
|
|
as in the corresponding http header, or a reference to an array
|
|
of language resp. charset identifiers.
|
|
|
|
Currently only the first charset passed is used as an argument.
|
|
You are strongly encouraged to pass a hard-coded value here, so
|
|
that you have control about your output.
|
|
|
|
The argument B<CATEGORY> specifies the category (one of the LC_*
|
|
constants as defined in Locale::Messages(3pm) or in POSIX(3pm)).
|
|
The category defaults to LC_ALL.
|
|
|
|
You can pass an optional reference to a list of locales in
|
|
XPG4 format that are available in your application. This is
|
|
useful if you know which languages are supported by your application.
|
|
In fact, only the language part of the values in the list are
|
|
considered (for example for "en_US", only "en" is used). The
|
|
country or other parts are ignored.
|
|
|
|
The function returns the return value of the underlying set_locale()
|
|
call, or false on failure.
|
|
|
|
The function returns false on failure. On success it returns the
|
|
return value of the underlying set_locale() call. This value can
|
|
be used directly in subsequent calls to POSIX::setlocale(). In
|
|
array context, it additionally returns the identifiers for the language,
|
|
the country, and the charset actually used.
|
|
|
|
=back
|
|
|
|
=head1 BUGS
|
|
|
|
The function set_locale() probably fails to guess the correct locale
|
|
identifier on a lot of systems. If you have found such a case,
|
|
please submit it as a bug report.
|
|
|
|
The bug tracking system for this packags is at
|
|
http://rt.cpan.org/NoAuth/Bugs.html?libintl-perl
|
|
|
|
Please note that this module is considered alpha code, and the interface
|
|
is not stable. Please contact the author, if you want to use it in
|
|
production code.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Copyright (C) 2002-2017 L<Guido Flohr|http://www.guido-flohr.net/>
|
|
(L<mailto:guido.flohr@cantanea.com>), all rights reserved. See the source
|
|
code for details!code for details!
|
|
|
|
=head1 SEE ALSO
|
|
|
|
POSIX(3pm), perl(1)
|
|
|
|
=cut
|
|
Local Variables:
|
|
mode: perl
|
|
perl-indent-level: 4
|
|
perl-continued-statement-offset: 4
|
|
perl-continued-brace-offset: 0
|
|
perl-brace-offset: -4
|
|
perl-brace-imaginary-offset: 0
|
|
perl-label-offset: -4
|
|
cperl-indent-level: 4
|
|
cperl-continued-statement-offset: 2
|
|
tab-width: 4
|
|
End:
|