default services conflit ?

This commit is contained in:
armansansd
2022-04-27 11:30:43 +02:00
parent 28190a5749
commit 8bb1064a3b
8132 changed files with 900138 additions and 426 deletions

View File

@@ -0,0 +1,61 @@
<?php
$serviceUrl = 'https://chromium-i18n.appspot.com/ssl-address';
// Make sure we're starting from a clean slate.
if (is_dir(__DIR__ . '/assets')) {
die('The assets/ directory already exists.');
}
// Make sure aria2 is installed.
exec('aria2c --version', $ariaVersion);
if (empty($ariaVersion) || strpos($ariaVersion[0], 'aria2 version') === false) {
die('aria2 must be installed.');
}
// Prepare the filesystem.
mkdir(__DIR__ . '/assets');
mkdir(__DIR__ . '/assets/google');
// Fetch country data (CLDR).
echo "Fetching country data.\n";
exec('git clone --depth 1 https://github.com/unicode-org/cldr-json.git ' . __DIR__ . '/assets/cldr');
// Fetch address data (Google).
echo "Generating the url list.\n";
$urlList = generate_url_list();
file_put_contents(__DIR__ . '/assets/url_list.txt', $urlList);
// Invoke aria2 and fetch the data.
echo "Downloading the raw data from Google's endpoint.\n";
exec('aria2c -u 16 -i ' . __DIR__ . '/assets/url_list.txt -d ' . __DIR__ . '/assets/google');
echo "Download complete.\n";
/**
* Generates a list of all urls that need to be downloaded using aria2.
*/
function generate_url_list()
{
global $serviceUrl;
$index = file_get_contents($serviceUrl);
// Get all links that start with /ssl-address/data.
// This avoids the /address/examples urls which aren't needed.
preg_match_all("/<a\shref=\'\/ssl-address\/data\/([^\"]*)\'>/siU", $index, $matches);
// Assemble the urls
$list = array_map(function ($href) use ($serviceUrl) {
// Replace the url encoded single slash with a real one.
$href = str_replace('&#39;', "'", $href);
// Convert 'US/CA' into 'US_CA.json'.
$filename = str_replace('/', '_', $href) . '.json';
$url = $serviceUrl . '/data/' . $href;
// aria2 expects the out= parameter to be in the next row,
// indented by two spaces.
$url .= "\n out=$filename";
return $url;
}, $matches[1]);
return implode("\n", $list);
}

View File

@@ -0,0 +1,535 @@
<?php
/**
* Generates address formats, and the JSON files stored in resources/subdivision.
*/
set_time_limit(0);
date_default_timezone_set('UTC');
include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/../resources/library_customizations.php';
use CommerceGuys\Addressing\AddressFormat\AddressField;
use CommerceGuys\Addressing\AddressFormat\AdministrativeAreaType;
use CommerceGuys\Addressing\AddressFormat\DependentLocalityType;
use CommerceGuys\Addressing\AddressFormat\PostalCodeType;
use CommerceGuys\Addressing\Country\CountryRepository;
use CommerceGuys\Addressing\Locale;
$countryRepository = new CountryRepository();
$countries = $countryRepository->getList();
ksort($countries);
$serviceUrl = 'https://chromium-i18n.appspot.com/ssl-address';
// Make sure we're starting from a clean slate.
if (is_dir(__DIR__ . '/subdivision')) {
die('The subdivision/ directory must not exist.');
}
// Prepare the filesystem.
mkdir(__DIR__ . '/subdivision');
// Create a list of countries for which Google has definitions.
$foundCountries = ['ZZ'];
$index = file_get_contents($serviceUrl);
foreach ($countries as $countryCode => $countryName) {
$link = "<a href='/ssl-address/data/{$countryCode}'>";
// This is still faster than running a file_exists() for each country code.
if (strpos($index, $link) !== false) {
$foundCountries[] = $countryCode;
}
}
echo "Converting the raw definitions into the expected format.\n";
$genericDefinition = null;
$addressFormats = [];
$groupedSubdivisions = [];
foreach ($foundCountries as $countryCode) {
$definition = file_get_contents(__DIR__ . '/assets/google/' . $countryCode . '.json');
$definition = json_decode($definition, true);
$extraKeys = array_diff(array_keys($definition), ['id', 'key', 'name']);
if (empty($extraKeys)) {
// This is an empty definition, skip it.
continue;
}
if ($countryCode == 'MO') {
// Fix for Macao, which has latin and non-latin formats, but no lang.
$definition['lang'] = 'zh';
}
$addressFormat = create_address_format_definition($countryCode, $definition);
// Get the French subdivision names for Canada.
// This mechanism can only work for countries with a single
// alternative language and ISO-based subdivision codes
// (URL example: data/CA/AB and data/CA/AB--fr).
$languages = [];
if ($countryCode == 'CA' && isset($definition['languages'])) {
$languages = explode('~', $definition['languages']);
array_shift($languages);
}
$subdivisionPaths = [];
if (isset($definition['sub_keys'])) {
$subdivisionKeys = explode('~', $definition['sub_keys']);
foreach ($subdivisionKeys as $subdivisionKey) {
$subdivisionPaths[] = $countryCode . '_' . $subdivisionKey;
}
}
$groupedSubdivisions += generate_subdivisions($countryCode, [$countryCode], $subdivisionPaths, $languages);
$addressFormats[$countryCode] = $addressFormat;
}
echo "Writing the final definitions to disk.\n";
// Subdivisions are stored in JSON.
foreach ($groupedSubdivisions as $parentId => $subdivisions) {
file_put_json(__DIR__ . '/subdivision/' . $parentId . '.json', $subdivisions);
}
// Replace subdivision/ES.json with the old resources/subdivision/ES.json, to
// get around a dataset regression (https://github.com/googlei18n/libaddressinput/issues/160).
copy(__DIR__ . '/../resources/subdivision/ES.json', __DIR__ . '/subdivision/ES.json');
// Generate the subdivision depths for each country.
$depths = generate_subdivision_depths($foundCountries);
foreach ($depths as $countryCode => $depth) {
$addressFormats[$countryCode]['subdivision_depth'] = $depth;
}
// Address formats are stored in PHP, then manually transferred to
// AddressFormatRepository.
file_put_php(__DIR__ . '/address_formats.php', $addressFormats);
echo "Done.\n";
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}
/**
* Converts the provided data into php and writes it to the disk.
*/
function file_put_php($filename, $data)
{
$data = var_export($data, true) . ';';
// The var_export output is terrible, so try to get it as close as possible
// to the final result.
$array_keys = [
'0 => ', '1 => ', '2 => ', '3 => ', '4 => ', '5 => ',
'6 => ', '7 => ', '8 => ', '9 => ', '10 => ', '11 => ',
];
$data = str_replace(['array (', "),\n", ');', "=> \n "], ['[', "],\n", '];', '=> '], $data);
$data = str_replace('=> [', '=> [', $data);
$data = str_replace($array_keys, '', $data);
// Put fields into one row.
$find = [];
$replace = [];
foreach (AddressField::getAll() as $field) {
$find[] = "'$field',\n '";
$replace[] = "'$field', '";
}
$data = str_replace($find, $replace, $data);
// Replace format single quotes with double quotes, to parse \n properly.
$data = str_replace(["format' => '", ";;;'"], ['format\' => "', '"'], $data);
// Reindent (from 2 to 4 spaces).
$data = str_replace(' ', ' ', $data);
// Unescape backslashes.
$data = str_replace('\\\\', '\\', $data);
$data = '<?php' . "\n\n" . '$data = ' . $data;
file_put_contents($filename, $data);
}
/**
* Recursively generates subdivision definitions.
*/
function generate_subdivisions($countryCode, array $parents, $subdivisionPaths, $languages)
{
$group = build_group($parents);
$subdivisions = [];
$subdivisions[$group] = [
'country_code' => $countryCode,
];
if (count($parents) > 1) {
// A single parent is the same as the country code, hence unnecessary.
$subdivisions[$group]['parents'] = $parents;
}
foreach ($subdivisionPaths as $subdivisionPath) {
$definition = file_get_contents(__DIR__ . '/assets/google/' . $subdivisionPath . '.json');
$definition = json_decode($definition, true);
// The lname is usable as a latin code when the key is non-latin.
$code = $definition['key'];
if (isset($definition['lname'])) {
$code = $definition['lname'];
}
if (empty($subdivisions[$group]['locale']) && isset($definition['lang'], $definition['lname'])) {
// Only add the locale if there's a local name.
$subdivisions[$group]['locale'] = process_locale($definition['lang']);
}
// (Ab)use the local_name field to hold latin translations. This allows
// us to support only a single translation, but since our only example
// here is Canada (with French), it will do.
$translationLanguage = reset($languages);
if ($translationLanguage) {
$translation = file_get_contents(__DIR__ . '/assets/google/' . $subdivisionPath . '--' . $translationLanguage . '.json');
$translation = json_decode($translation, true);
$subdivisions[$group]['locale'] = Locale::canonicalize($translationLanguage);
$definition['lname'] = $definition['name'];
$definition['name'] = $translation['name'];
}
// Remove the locale key if it wasn't filled.
if (empty($subdivisions[$group]['locale'])) {
unset($subdivisions[$group]['locale']);
}
// Generate the subdivision.
$subdivisions[$group]['subdivisions'][$code] = create_subdivision_definition($countryCode, $code, $definition);
if (isset($definition['sub_keys'])) {
$subdivisions[$group]['subdivisions'][$code]['has_children'] = true;
$subdivisionChildrenPaths = [];
$subdivisionChildrenKeys = explode('~', $definition['sub_keys']);
foreach ($subdivisionChildrenKeys as $subdivisionChildrenKey) {
$subdivisionChildrenPaths[] = $subdivisionPath . '_' . $subdivisionChildrenKey;
}
$childParents = array_merge($parents, [$code]);
$subdivisions += generate_subdivisions($countryCode, $childParents, $subdivisionChildrenPaths, $languages);
}
}
// Apply any found customizations.
$customizations = get_subdivision_customizations($group);
$subdivisions[$group] = apply_subdivision_customizations($subdivisions[$group], $customizations);
return !empty($subdivisions[$group]['subdivisions']) ? $subdivisions : [];
}
/**
* Generates the subdivision depths for each country.
*/
function generate_subdivision_depths($countries)
{
$depths = [];
foreach ($countries as $countryCode) {
$patterns = [
__DIR__ . '/subdivision/' . $countryCode . '.json',
__DIR__ . '/subdivision/' . $countryCode . '-*.json',
__DIR__ . '/subdivision/' . $countryCode . '--*.json',
];
foreach ($patterns as $pattern) {
if (glob($pattern)) {
$previous = isset($depths[$countryCode]) ? $depths[$countryCode] : 0;
$depths[$countryCode] = $previous + 1;
} else {
break;
}
}
}
return $depths;
}
/**
* Creates an address format definition from Google's raw definition.
*/
function create_address_format_definition($countryCode, $rawDefinition)
{
// Avoid notices.
$rawDefinition += [
'lang' => null,
'fmt' => null,
'require' => null,
'upper' => null,
'state_name_type' => null,
'locality_name_type' => null,
'sublocality_name_type' => null,
'zip_name_type' => null,
];
// ZZ holds the defaults for all address formats, and these are missing.
if ($countryCode == 'ZZ') {
$rawDefinition['state_name_type'] = AdministrativeAreaType::getDefault();
$rawDefinition['sublocality_name_type'] = DependentLocalityType::getDefault();
$rawDefinition['zip_name_type'] = PostalCodeType::getDefault();
}
$addressFormat = [
'locale' => process_locale($rawDefinition['lang']),
'format' => null,
'local_format' => null,
'required_fields' => convert_fields($rawDefinition['require'], 'required'),
'uppercase_fields' => convert_fields($rawDefinition['upper'], 'uppercase'),
];
if (isset($rawDefinition['lfmt']) && $rawDefinition['lfmt'] != $rawDefinition['fmt']) {
$addressFormat['format'] = convert_format($countryCode, $rawDefinition['lfmt']);
$addressFormat['local_format'] = convert_format($countryCode, $rawDefinition['fmt']);
} else {
$addressFormat['format'] = convert_format($countryCode, $rawDefinition['fmt']);
// We don't need the locale if there's no local format.
unset($addressFormat['locale']);
}
$addressFormat['administrative_area_type'] = $rawDefinition['state_name_type'];
$addressFormat['locality_type'] = $rawDefinition['locality_name_type'];
$addressFormat['dependent_locality_type'] = $rawDefinition['sublocality_name_type'];
$addressFormat['postal_code_type'] = $rawDefinition['zip_name_type'];
if (isset($rawDefinition['zip'])) {
$addressFormat['postal_code_pattern'] = $rawDefinition['zip'];
}
if (isset($rawDefinition['postprefix'])) {
// Workaround for https://github.com/googlei18n/libaddressinput/issues/72.
if ($rawDefinition['postprefix'] == 'PR') {
$rawDefinition['postprefix'] = 'PR ';
}
$addressFormat['postal_code_prefix'] = $rawDefinition['postprefix'];
// Remove the prefix from the format strings.
// Workaround for https://github.com/googlei18n/libaddressinput/issues/71.
$addressFormat['format'] = str_replace($addressFormat['postal_code_prefix'], '', $addressFormat['format']);
$addressFormat['local_format'] = str_replace($addressFormat['postal_code_prefix'], '', $addressFormat['local_format']);
}
// Add the subdivision_depth to the end of the ZZ definition.
if ($countryCode == 'ZZ') {
$addressFormat['subdivision_depth'] = 0;
}
// Remove multiple spaces in the formats.
if (!empty($addressFormat['format'])) {
$addressFormat['format'] = preg_replace('/[[:blank:]]+/', ' ', $addressFormat['format']);
}
if (!empty($addressFormat['local_format'])) {
$addressFormat['local_format'] = preg_replace('/[[:blank:]]+/', ' ', $addressFormat['local_format']);
}
// Apply any customizations.
$customizations = get_address_format_customizations($countryCode);
foreach ($customizations as $key => $values) {
$addressFormat[$key] = $values;
}
// Denote the end of the format string for file_put_php().
if (!empty($addressFormat['format'])) {
$addressFormat['format'] .= ';;;';
}
if (!empty($addressFormat['local_format'])) {
$addressFormat['local_format'] .= ';;;';
}
// Remove NULL keys.
$addressFormat = array_filter($addressFormat, function ($value) {
return !is_null($value);
});
// Remove empty local formats.
if (empty($addressFormat['local_format'])) {
unset($addressFormat['local_format']);
}
return $addressFormat;
}
/**
* Creates a subdivision definition from Google's raw definition.
*/
function create_subdivision_definition($countryCode, $code, $rawDefinition)
{
$subdivision = [];
if (isset($rawDefinition['lname'])) {
$subdivision['local_code'] = $rawDefinition['key'];
if (isset($rawDefinition['name']) && $rawDefinition['key'] != $rawDefinition['name']) {
$subdivision['local_name'] = $rawDefinition['name'];
}
if ($code != $rawDefinition['lname']) {
$subdivision['name'] = $rawDefinition['lname'];
}
} elseif (isset($rawDefinition['name']) && $rawDefinition['key'] != $rawDefinition['name']) {
$subdivision['name'] = $rawDefinition['name'];
}
if (isset($rawDefinition['isoid'])) {
$subdivision['iso_code'] = $countryCode . '-' . $rawDefinition['isoid'];
}
if (isset($rawDefinition['xzip'])) {
$subdivision['postal_code_pattern'] = $rawDefinition['xzip'];
$subdivision['postal_code_pattern_type'] = 'full';
} elseif (isset($rawDefinition['zip'])) {
$subdivision['postal_code_pattern'] = $rawDefinition['zip'];
// There are more than 12 000 subdivisions, but only a few Chinese
// ones specify a full pattern. Therefore, the postal_code_pattern_type
// value is the same for most subdivisions, and omitted to save space.
}
return $subdivision;
}
/**
* Applies subdivision customizations.
*/
function apply_subdivision_customizations($subdivisions, $customizations)
{
if (empty($customizations)) {
return $subdivisions;
}
$customizations += [
'_remove' => [],
'_replace' => [],
'_add' => [],
'_add_after' => [],
];
foreach ($customizations['_remove'] as $removeId) {
unset($subdivisions['subdivisions'][$removeId]);
}
foreach ($customizations['_replace'] as $replaceId) {
$subdivisions['subdivisions'][$replaceId] = $customizations[$replaceId];
}
foreach ($customizations['_add'] as $addId) {
$subdivisions['subdivisions'][$addId] = $customizations[$addId];
}
foreach ($customizations['_add_after'] as $addId => $nextId) {
$position = array_search($nextId, array_keys($subdivisions['subdivisions']));
$new = [
$addId => $customizations[$addId],
];
// array_splice() doesn't support non-numeric replacement keys.
$start = array_slice($subdivisions['subdivisions'], 0, $position);
$end = array_slice($subdivisions['subdivisions'], $position);
$subdivisions['subdivisions'] = $start + $new + $end;
}
return $subdivisions;
}
/**
* Processes the locale string.
*/
function process_locale($locale) {
// Be more precise when it comes to Chinese Simplified.
if ($locale == 'zh') {
$locale = 'zh-hans';
}
return Locale::canonicalize($locale);
}
/**
* Converts the provided format string into one recognized by the library.
*/
function convert_format($countryCode, $format)
{
if (empty($format)) {
return null;
}
// Expand the recipient token into separate familyName/givenName tokens.
// The additionalName field is not used by default.
// Hardcode the list of countries that write the family name before the
// given name, since the API doesn't give us that info.
$reverseCountries = [
'KH', 'CN', 'HU', 'JP', 'KO', 'MG', 'TW', 'VN',
];
if (in_array($countryCode, $reverseCountries)) {
$format = str_replace('%N', '%N3 %N1', $format);
} else {
$format = str_replace('%N', '%N1 %N3', $format);
}
// Expand the address token into separate tokens for address lines 1 and 2.
$format = str_replace('%A', '%1%n%2', $format);
$replacements = [
'%S' => '%' . AddressField::ADMINISTRATIVE_AREA,
'%C' => '%' . AddressField::LOCALITY,
'%D' => '%' . AddressField::DEPENDENT_LOCALITY,
'%Z' => '%' . AddressField::POSTAL_CODE,
'%X' => '%' . AddressField::SORTING_CODE,
'%1' => '%' . AddressField::ADDRESS_LINE1,
'%2' => '%' . AddressField::ADDRESS_LINE2,
'%O' => '%' . AddressField::ORGANIZATION,
'%N3' => '%' . AddressField::FAMILY_NAME,
'%N2' => '%' . AddressField::ADDITIONAL_NAME,
'%N1' => '%' . AddressField::GIVEN_NAME,
'%n' => '\n',
// Remove hardcoded strings which duplicate the country name.
'%nÅLAND' => '',
'JERSEY%n' => '',
'GUERNSEY%n' => '',
'GIBRALTAR%n' => '',
'SINGAPORE ' => '',
];
$format = strtr($format, $replacements);
return $format;
}
/**
* Converts google's field symbols to the expected values.
*/
function convert_fields($fields, $type)
{
if (is_null($fields)) {
return null;
}
if (empty($fields)) {
return [];
}
// Expand the name token into separate tokens.
if ($type == 'required') {
// The additional name is never required.
$fields = str_replace('N', '79', $fields);
} else {
$fields = str_replace('N', '789', $fields);
}
// Expand the address token into separate tokens for address lines 1 and 2.
// For required fields it's enough to require the first line.
if ($type == 'required') {
$fields = str_replace('A', '1', $fields);
} else {
$fields = str_replace('A', '12', $fields);
}
$mapping = [
'S' => AddressField::ADMINISTRATIVE_AREA,
'C' => AddressField::LOCALITY,
'D' => AddressField::DEPENDENT_LOCALITY,
'Z' => AddressField::POSTAL_CODE,
'X' => AddressField::SORTING_CODE,
'1' => AddressField::ADDRESS_LINE1,
'2' => AddressField::ADDRESS_LINE2,
'O' => AddressField::ORGANIZATION,
'7' => AddressField::FAMILY_NAME,
'8' => AddressField::ADDITIONAL_NAME,
'9' => AddressField::GIVEN_NAME,
];
$fields = str_split($fields);
foreach ($fields as $key => $field) {
if (isset($mapping[$field])) {
$fields[$key] = $mapping[$field];
}
}
return $fields;
}
/**
* Copy of SubdivisionRepository::buildGroup().
*/
function build_group(array $parents)
{
if (empty($parents)) {
throw new \InvalidArgumentException('The $parents argument must not be empty.');
}
$countryCode = array_shift($parents);
$group = $countryCode;
if ($parents) {
// A dash per key allows the depth to be guessed later.
$group .= str_repeat('-', count($parents));
// Hash the remaining keys to ensure that the group is ASCII safe.
// crc32b is the fastest but has collisions due to its short length.
// sha1 and md5 are forbidden by many projects and organizations.
// This is the next fastest option.
$group .= hash('tiger128,3', implode('-', $parents));
}
return $group;
}

View File

@@ -0,0 +1,306 @@
<?php
/**
* Generates the json files stored in resources/country.
*/
set_time_limit(0);
date_default_timezone_set('UTC');
include __DIR__ . '/../vendor/autoload.php';
$localeDirectory = __DIR__ . '/assets/cldr/cldr-json/cldr-localenames-modern/main/';
$enCountries = $localeDirectory . 'en/territories.json';
$codeMappings = __DIR__ . '/assets/cldr/cldr-json/cldr-core/supplemental/codeMappings.json';
$currencyData = __DIR__ . '/assets/cldr/cldr-json/cldr-core/supplemental/currencyData.json';
if (!file_exists($enCountries)) {
die("The $enCountries file was not found");
}
if (!file_exists($codeMappings)) {
die("The $codeMappings file was not found");
}
if (!file_exists($currencyData)) {
die("The $currencyData file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
if (!is_dir($localeDirectory)) {
die("The $localeDirectory directory was not found");
}
$codeMappings = json_decode(file_get_contents($codeMappings), true);
$codeMappings = $codeMappings['supplemental']['codeMappings'];
$currencyData = json_decode(file_get_contents($currencyData), true);
$currencyData = $currencyData['supplemental']['currencyData'];
$englishData = json_decode(file_get_contents($enCountries), true);
$englishData = $englishData['main']['en']['localeDisplayNames']['territories'];
$baseData = generate_base_data($englishData, $codeMappings, $currencyData);
$localizations = generate_localizations($baseData, $englishData);
$localizations = filter_duplicate_localizations($localizations);
// Make sure we're starting from a clean slate.
if (is_dir(__DIR__ . '/country')) {
die('The country/ directory must not exist.');
}
// Prepare the filesystem.
mkdir(__DIR__ . '/country');
// Write out the localizations.
foreach ($localizations as $locale => $localizedCountries) {
$collator = collator_create($locale);
uasort($localizedCountries, function ($a, $b) use ($collator) {
return collator_compare($collator, $a, $b);
});
file_put_json(__DIR__ . '/country/' . $locale . '.json', $localizedCountries);
}
$availableLocales = array_keys($localizations);
sort($availableLocales);
// Base country definitions and available locales are stored
// in PHP, then manually transferred to CountryRepository.
$data = "<?php\n\n";
$data .= export_locales($availableLocales);
$data .= export_base_data($baseData);
file_put_contents(__DIR__ . '/country_data.php', $data);
echo "Done.\n";
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}
/**
* Exports base data.
*/
function export_base_data($baseData)
{
$export = '$baseData = [' . "\n";
foreach ($baseData as $countryCode => $countryData) {
$threeLetterCode = 'null';
if (isset($countryData['three_letter_code'])) {
$threeLetterCode = "'" . $countryData['three_letter_code'] . "'";
}
$numericCode = 'null';
if (isset($countryData['numeric_code'])) {
$numericCode = "'" . $countryData['numeric_code'] . "'";
}
$currencyCode = 'null';
if (isset($countryData['currency_code'])) {
$currencyCode = "'" . $countryData['currency_code'] . "'";
}
$export .= " '" . $countryCode . "' => [";
$export .= $threeLetterCode . ", " . $numericCode . ', ' . $currencyCode;
$export .= "],\n";
}
$export .= "];";
return $export;
}
/**
* Exports locales.
*/
function export_locales($data)
{
// Wrap the values in single quotes.
$data = array_map(function ($value) {
return "'" . $value . "'";
}, $data);
$export = '// ' . count($data) . " available locales. \n";
$export .= '$locales = [' . "\n";
$export .= ' ' . implode(', ', $data) . "\n";
$export .= "];\n\n";
return $export;
}
/**
* Generates the base data.
*/
function generate_base_data(array $englishData, array $codeMappings, array $currencyData) {
$ignoredCountries = [
'AN', // Netherlands Antilles, no longer exists.
'EU', 'QO', // European Union, Outlying Oceania. Not countries.
'XA', 'XB',
'ZZ', // Unknown region
];
$baseData = [];
foreach ($englishData as $countryCode => $countryName) {
if (is_numeric($countryCode) || in_array($countryCode, $ignoredCountries)) {
// Ignore continents, regions, uninhabited islands.
continue;
}
if (strpos($countryCode, '-alt-') !== false) {
// Ignore alternative names.
continue;
}
// Countries are not guaranteed to have an alpha3 and/or numeric code.
if (isset($codeMappings[$countryCode]['_alpha3'])) {
$baseData[$countryCode]['three_letter_code'] = $codeMappings[$countryCode]['_alpha3'];
}
if (isset($codeMappings[$countryCode]['_numeric'])) {
$baseData[$countryCode]['numeric_code'] = $codeMappings[$countryCode]['_numeric'];
}
// Determine the current currency for this country.
if (isset($currencyData['region'][$countryCode])) {
$currencies = prepare_currencies($currencyData['region'][$countryCode]);
if ($currencies) {
$currencyCodes = array_keys($currencies);
$currentCurrency = end($currencyCodes);
$baseData[$countryCode]['currency_code'] = $currentCurrency;
}
}
}
ksort($baseData);
return $baseData;
}
/**
* Generates the localizations.
*/
function generate_localizations(array $baseData, array $englishData) {
global $localeDirectory;
$localizations = [];
foreach (discover_locales() as $locale) {
$data = json_decode(file_get_contents($localeDirectory . $locale . '/territories.json'), true);
$data = $data['main'][$locale]['localeDisplayNames']['territories'];
foreach ($data as $countryCode => $countryName) {
if (isset($baseData[$countryCode])) {
// This country name is untranslated, use the english version.
if ($countryCode == str_replace('_', '-', $countryName)) {
$countryName = $englishData[$countryCode];
}
$localizations[$locale][$countryCode] = $countryName;
}
}
}
return $localizations;
}
/**
* Filters out duplicate localizations (same as their parent locale).
*
* For example, "fr-FR" will be removed if "fr" has the same data.
*/
function filter_duplicate_localizations(array $localizations) {
$duplicates = [];
foreach ($localizations as $locale => $localizedCountries) {
if ($parentLocale = \CommerceGuys\Addressing\Locale::getParent($locale)) {
$parentCountries = isset($localizations[$parentLocale]) ? $localizations[$parentLocale] : [];
$diff = array_udiff($localizedCountries, $parentCountries, function ($first, $second) {
return ($first == $second) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
foreach ($duplicates as $locale) {
unset($localizations[$locale]);
}
return $localizations;
}
/**
* Creates a list of available locales.
*/
function discover_locales() {
global $localeDirectory;
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = [
// English is our fallback, we don't need another.
"und",
// Esperanto, Interlingua, Volapuk are made up languages.
"eo", "ia", "vo",
// Belarus (Classical orthography), Church Slavic, Manx, Prussian are historical.
"be-tarask", "cu", "gv", "prg",
// Valencian differs from its parent only by a single character (è/é).
"ca-ES-valencia",
// Infrequently used locales.
"jv", "kn", "row",
];
// Gather available locales.
$locales = [];
if ($handle = opendir($localeDirectory)) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
return $locales;
}
/**
* Prepares the currencies for a specific country.
*/
function prepare_currencies($currencies)
{
if (empty($currencies)) {
return [];
}
// Rekey the array by currency code.
foreach ($currencies as $index => $realCurrencies) {
foreach ($realCurrencies as $currencyCode => $currency) {
$currencies[$currencyCode] = $currency;
}
unset($currencies[$index]);
}
// Remove non-tender currencies.
$currencies = array_filter($currencies, function ($currency) {
return !isset($currency['_tender']) || $currency['_tender'] != 'false';
});
// Sort by _from date.
uasort($currencies, 'compare_from_dates');
return $currencies;
}
/**
* uasort callback for comparing arrays using their "_from" dates.
*/
function compare_from_dates($a, $b)
{
$a = new DateTime($a['_from']);
$b = new DateTime($b['_from']);
// DateTime overloads the comparison providers.
if ($a == $b) {
return 0;
}
return ($a < $b) ? -1 : 1;
}

View File

@@ -0,0 +1,80 @@
<?php
/**
* Google's dataset includes regular expressions for validating postal codes.
* These regular expressions are meant to be consumed by Google's Java library,
* and compatibility with PHP's preg_match is not 100% guaranteed.
* This scripts performs validation to ensure compatibility.
*/
include '../vendor/autoload.php';
use CommerceGuys\Addressing\Address;
use CommerceGuys\Addressing\AddressFormat\AddressField;
use CommerceGuys\Addressing\AddressFormat\AddressFormatRepository;
use CommerceGuys\Addressing\Country\CountryRepository;
use CommerceGuys\Addressing\Validator\Constraints\AddressFormatConstraint;
use Symfony\Component\Validator\Validation;
$addressFormatRepository = new AddressFormatRepository();
$address = new Address();
$address = $address
->withAddressLine1('Address line1')
->withAddressLine1('Address line2')
->withLocality('Locality');
$validator = Validation::createValidator();
// Create a list of countries for which Google has definitions.
$foundCountries = ['ZZ'];
$countryRepository = new CountryRepository();
$countries = $countryRepository->getList();
$serviceUrl = 'http://i18napis.appspot.com/address';
$index = file_get_contents($serviceUrl);
foreach ($countries as $countryCode => $countryName) {
$link = "<a href='/address/data/{$countryCode}'>";
// This is still faster than running a file_exists() for each country code.
if (strpos($index, $link) !== FALSE) {
$foundCountries[] = $countryCode;
}
}
foreach ($foundCountries as $countryCode) {
$addressFormat = $addressFormatRepository->get($countryCode);
if (!in_array(AddressField::POSTAL_CODE, $addressFormat->getUsedFields())) {
continue;
}
$definition = file_get_contents('assets/google/' . $countryCode . '.json');
$definition = json_decode($definition, TRUE);
// If country definition has zip examples, check if they pass validation.
if (isset($definition['zipex'])) {
$zipExamples = explode(',', $definition['zipex']);
$address = $address->withCountryCode($countryCode);
foreach ($zipExamples as $zipExample) {
if (strpos($zipExample, ':') !== FALSE) {
// Ignore ranges for now, the non-range examples are enough.
continue;
}
$address = $address->withPostalCode($zipExample);
$violations = $validator->validate($address, new AddressFormatConstraint());
$formattedExamples = implode(', ', $zipExamples);
foreach ($violations as $violation) {
if ($violation->getPropertyPath() == '[postalCode]') {
$message = $violation->getMessage();
$postalCodePattern = $addressFormat->getPostalCodePattern();
echo "Error for countrycode '$countryCode' with postal code '$zipExample'.\n";
echo "Error: $message\n";
echo "Postal code pattern: $postalCodePattern\n";
echo "All available postal code examples: $formattedExamples\n\n";
// Once we catch an error in a country, don't try other examples.
continue 3;
}
}
}
}
}