Skip to content
This repository has been archived by the owner on Dec 11, 2020. It is now read-only.

Added transliterator to email and username #333

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
"phpunit/phpunit": "~4.0",
"squizlabs/php_codesniffer": "~1.5"
},
"suggest": {
"ext-intl": "Needed for transliterate"
},
"autoload": {
"psr-0": {
"Faker": "src/",
Expand Down
137 changes: 133 additions & 4 deletions src/Faker/Provider/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,144 @@ class Internet extends \Faker\Provider\Base
'http://{{domainName}}/{{slug}}.html',
'https://{{domainName}}/{{slug}}.html',
);

public static function toAscii($string)
{
$transliterationTable = array(
'IJ' => 'I','Ö' => 'O','Œ' => 'O','Ü' => 'U','ä' => 'a','æ' => 'a',
'ij' => 'i','ö' => 'o','œ' => 'o','ü' => 'u','ß' => 's','ſ' => 's',
'À' => 'A','Á' => 'A','Â' => 'A','Ã' => 'A','Ä' => 'A','Å' => 'A',
'Æ' => 'A','Ā' => 'A','Ą' => 'A','Ă' => 'A','Ç' => 'C','Ć' => 'C',
'Č' => 'C','Ĉ' => 'C','Ċ' => 'C','Ď' => 'D','Đ' => 'D','È' => 'E',
'É' => 'E','Ê' => 'E','Ë' => 'E','Ē' => 'E','Ę' => 'E','Ě' => 'E',
'Ĕ' => 'E','Ė' => 'E','Ĝ' => 'G','Ğ' => 'G','Ġ' => 'G','Ģ' => 'G',
'Ĥ' => 'H','Ħ' => 'H','Ì' => 'I','Í' => 'I','Î' => 'I','Ï' => 'I',
'Ī' => 'I','Ĩ' => 'I','Ĭ' => 'I','Į' => 'I','İ' => 'I','Ĵ' => 'J',
'Ķ' => 'K','Ľ' => 'K','Ĺ' => 'K','Ļ' => 'K','Ŀ' => 'K','Ł' => 'L',
'Ñ' => 'N','Ń' => 'N','Ň' => 'N','Ņ' => 'N','Ŋ' => 'N','Ò' => 'O',
'Ó' => 'O','Ô' => 'O','Õ' => 'O','Ø' => 'O','Ō' => 'O','Ő' => 'O',
'Ŏ' => 'O','Ŕ' => 'R','Ř' => 'R','Ŗ' => 'R','Ś' => 'S','Ş' => 'S',
'Ŝ' => 'S','Ș' => 'S','Š' => 'S','Ť' => 'T','Ţ' => 'T','Ŧ' => 'T',
'Ț' => 'T','Ù' => 'U','Ú' => 'U','Û' => 'U','Ū' => 'U','Ů' => 'U',
'Ű' => 'U','Ŭ' => 'U','Ũ' => 'U','Ų' => 'U','Ŵ' => 'W','Ŷ' => 'Y',
'Ÿ' => 'Y','Ý' => 'Y','Ź' => 'Z','Ż' => 'Z','Ž' => 'Z','à' => 'a',
'á' => 'a','â' => 'a','ã' => 'a','ā' => 'a','ą' => 'a','ă' => 'a',
'å' => 'a','ç' => 'c','ć' => 'c','č' => 'c','ĉ' => 'c','ċ' => 'c',
'ď' => 'd','đ' => 'd','è' => 'e','é' => 'e','ê' => 'e','ë' => 'e',
'ē' => 'e','ę' => 'e','ě' => 'e','ĕ' => 'e','ė' => 'e','ƒ' => 'f',
'ĝ' => 'g','ğ' => 'g','ġ' => 'g','ģ' => 'g','ĥ' => 'h','ħ' => 'h',
'ì' => 'i','í' => 'i','î' => 'i','ï' => 'i','ī' => 'i','ĩ' => 'i',
'ĭ' => 'i','į' => 'i','ı' => 'i','ĵ' => 'j','ķ' => 'k','ĸ' => 'k',
'ł' => 'l','ľ' => 'l','ĺ' => 'l','ļ' => 'l','ŀ' => 'l','ñ' => 'n',
'ń' => 'n','ň' => 'n','ņ' => 'n','ʼn' => 'n','ŋ' => 'n','ò' => 'o',
'ó' => 'o','ô' => 'o','õ' => 'o','ø' => 'o','ō' => 'o','ő' => 'o',
'ŏ' => 'o','ŕ' => 'r','ř' => 'r','ŗ' => 'r','ś' => 's','š' => 's',
'ť' => 't','ù' => 'u','ú' => 'u','û' => 'u','ū' => 'u','ů' => 'u',
'ű' => 'u','ŭ' => 'u','ũ' => 'u','ų' => 'u','ŵ' => 'w','ÿ' => 'y',
'ý' => 'y','ŷ' => 'y','ż' => 'z','ź' => 'z','ž' => 'z','Α' => 'A',
'Ά' => 'A','Ἀ' => 'A','Ἁ' => 'A','Ἂ' => 'A','Ἃ' => 'A','Ἄ' => 'A',
'Ἅ' => 'A','Ἆ' => 'A','Ἇ' => 'A','ᾈ' => 'A','ᾉ' => 'A','ᾊ' => 'A',
'ᾋ' => 'A','ᾌ' => 'A','ᾍ' => 'A','ᾎ' => 'A','ᾏ' => 'A','Ᾰ' => 'A',
'Ᾱ' => 'A','Ὰ' => 'A','ᾼ' => 'A','Β' => 'B','Γ' => 'G','Δ' => 'D',
'Ε' => 'E','Έ' => 'E','Ἐ' => 'E','Ἑ' => 'E','Ἒ' => 'E','Ἓ' => 'E',
'Ἔ' => 'E','Ἕ' => 'E','Ὲ' => 'E','Ζ' => 'Z','Η' => 'I','Ή' => 'I',
'Ἠ' => 'I','Ἡ' => 'I','Ἢ' => 'I','Ἣ' => 'I','Ἤ' => 'I','Ἥ' => 'I',
'Ἦ' => 'I','Ἧ' => 'I','ᾘ' => 'I','ᾙ' => 'I','ᾚ' => 'I','ᾛ' => 'I',
'ᾜ' => 'I','ᾝ' => 'I','ᾞ' => 'I','ᾟ' => 'I','Ὴ' => 'I','ῌ' => 'I',
'Θ' => 'T','Ι' => 'I','Ί' => 'I','Ϊ' => 'I','Ἰ' => 'I','Ἱ' => 'I',
'Ἲ' => 'I','Ἳ' => 'I','Ἴ' => 'I','Ἵ' => 'I','Ἶ' => 'I','Ἷ' => 'I',
'Ῐ' => 'I','Ῑ' => 'I','Ὶ' => 'I','Κ' => 'K','Λ' => 'L','Μ' => 'M',
'Ν' => 'N','Ξ' => 'K','Ο' => 'O','Ό' => 'O','Ὀ' => 'O','Ὁ' => 'O',
'Ὂ' => 'O','Ὃ' => 'O','Ὄ' => 'O','Ὅ' => 'O','Ὸ' => 'O','Π' => 'P',
'Ρ' => 'R','Ῥ' => 'R','Σ' => 'S','Τ' => 'T','Υ' => 'Y','Ύ' => 'Y',
'Ϋ' => 'Y','Ὑ' => 'Y','Ὓ' => 'Y','Ὕ' => 'Y','Ὗ' => 'Y','Ῠ' => 'Y',
'Ῡ' => 'Y','Ὺ' => 'Y','Φ' => 'F','Χ' => 'X','Ψ' => 'P','Ω' => 'O',
'Ώ' => 'O','Ὠ' => 'O','Ὡ' => 'O','Ὢ' => 'O','Ὣ' => 'O','Ὤ' => 'O',
'Ὥ' => 'O','Ὦ' => 'O','Ὧ' => 'O','ᾨ' => 'O','ᾩ' => 'O','ᾪ' => 'O',
'ᾫ' => 'O','ᾬ' => 'O','ᾭ' => 'O','ᾮ' => 'O','ᾯ' => 'O','Ὼ' => 'O',
'ῼ' => 'O','α' => 'a','ά' => 'a','ἀ' => 'a','ἁ' => 'a','ἂ' => 'a',
'ἃ' => 'a','ἄ' => 'a','ἅ' => 'a','ἆ' => 'a','ἇ' => 'a','ᾀ' => 'a',
'ᾁ' => 'a','ᾂ' => 'a','ᾃ' => 'a','ᾄ' => 'a','ᾅ' => 'a','ᾆ' => 'a',
'ᾇ' => 'a','ὰ' => 'a','ᾰ' => 'a','ᾱ' => 'a','ᾲ' => 'a','ᾳ' => 'a',
'ᾴ' => 'a','ᾶ' => 'a','ᾷ' => 'a','β' => 'b','γ' => 'g','δ' => 'd',
'ε' => 'e','έ' => 'e','ἐ' => 'e','ἑ' => 'e','ἒ' => 'e','ἓ' => 'e',
'ἔ' => 'e','ἕ' => 'e','ὲ' => 'e','ζ' => 'z','η' => 'i','ή' => 'i',
'ἠ' => 'i','ἡ' => 'i','ἢ' => 'i','ἣ' => 'i','ἤ' => 'i','ἥ' => 'i',
'ἦ' => 'i','ἧ' => 'i','ᾐ' => 'i','ᾑ' => 'i','ᾒ' => 'i','ᾓ' => 'i',
'ᾔ' => 'i','ᾕ' => 'i','ᾖ' => 'i','ᾗ' => 'i','ὴ' => 'i','ῂ' => 'i',
'ῃ' => 'i','ῄ' => 'i','ῆ' => 'i','ῇ' => 'i','θ' => 't','ι' => 'i',
'ί' => 'i','ϊ' => 'i','ΐ' => 'i','ἰ' => 'i','ἱ' => 'i','ἲ' => 'i',
'ἳ' => 'i','ἴ' => 'i','ἵ' => 'i','ἶ' => 'i','ἷ' => 'i','ὶ' => 'i',
'ῐ' => 'i','ῑ' => 'i','ῒ' => 'i','ῖ' => 'i','ῗ' => 'i','κ' => 'k',
'λ' => 'l','μ' => 'm','ν' => 'n','ξ' => 'k','ο' => 'o','ό' => 'o',
'ὀ' => 'o','ὁ' => 'o','ὂ' => 'o','ὃ' => 'o','ὄ' => 'o','ὅ' => 'o',
'ὸ' => 'o','π' => 'p','ρ' => 'r','ῤ' => 'r','ῥ' => 'r','σ' => 's',
'ς' => 's','τ' => 't','υ' => 'y','ύ' => 'y','ϋ' => 'y','ΰ' => 'y',
'ὐ' => 'y','ὑ' => 'y','ὒ' => 'y','ὓ' => 'y','ὔ' => 'y','ὕ' => 'y',
'ὖ' => 'y','ὗ' => 'y','ὺ' => 'y','ῠ' => 'y','ῡ' => 'y','ῢ' => 'y',
'ῦ' => 'y','ῧ' => 'y','φ' => 'f','χ' => 'x','ψ' => 'p','ω' => 'o',
'ώ' => 'o','ὠ' => 'o','ὡ' => 'o','ὢ' => 'o','ὣ' => 'o','ὤ' => 'o',
'ὥ' => 'o','ὦ' => 'o','ὧ' => 'o','ᾠ' => 'o','ᾡ' => 'o','ᾢ' => 'o',
'ᾣ' => 'o','ᾤ' => 'o','ᾥ' => 'o','ᾦ' => 'o','ᾧ' => 'o','ὼ' => 'o',
'ῲ' => 'o','ῳ' => 'o','ῴ' => 'o','ῶ' => 'o','ῷ' => 'o','А' => 'A',
'Б' => 'B','В' => 'V','Г' => 'G','Д' => 'D','Е' => 'E','Ё' => 'E',
'Ж' => 'Z','З' => 'Z','И' => 'I','Й' => 'I','К' => 'K','Л' => 'L',
'М' => 'M','Н' => 'N','О' => 'O','П' => 'P','Р' => 'R','С' => 'S',
'Т' => 'T','У' => 'U','Ф' => 'F','Х' => 'K','Ц' => 'T','Ч' => 'C',
'Ш' => 'S','Щ' => 'S','Ы' => 'Y','Э' => 'E','Ю' => 'Y','Я' => 'Y',
'а' => 'A','б' => 'B','в' => 'V','г' => 'G','д' => 'D','е' => 'E',
'ё' => 'E','ж' => 'Z','з' => 'Z','и' => 'I','й' => 'I','к' => 'K',
'л' => 'L','м' => 'M','н' => 'N','о' => 'O','п' => 'P','р' => 'R',
'с' => 'S','т' => 'T','у' => 'U','ф' => 'F','х' => 'K','ц' => 'T',
'ч' => 'C','ш' => 'S','щ' => 'S','ы' => 'Y','э' => 'E','ю' => 'Y',
'я' => 'Y','ð' => 'd','Ð' => 'D','þ' => 't','Þ' => 'T','ა' => 'a',
'ბ' => 'b','გ' => 'g','დ' => 'd','ე' => 'e','ვ' => 'v','ზ' => 'z',
'თ' => 't','ი' => 'i','კ' => 'k','ლ' => 'l','მ' => 'm','ნ' => 'n',
'ო' => 'o','პ' => 'p','ჟ' => 'z','რ' => 'r','ს' => 's','ტ' => 't',
'უ' => 'u','ფ' => 'p','ქ' => 'k','ღ' => 'g','ყ' => 'q','შ' => 's',
'ჩ' => 'c','ც' => 't','ძ' => 'd','წ' => 't','ჭ' => 'c','ხ' => 'k',
'ჯ' => 'j','ჰ' => 'h','ā' => 'a','ţ' => 't','ʼ' => "'", '̧' => '',
'ḩ' => 'h','ʼ' => "'",'‘' => "'",'’' => "'",'ừ' => 'u','/' => '',
'ế' => 'e','ả' => 'a','ị' => 'i','ậ' => 'a','ệ' => 'e','ỉ' => 'i',
'ồ' => 'o','ề' => 'e','ơ' => 'o','ạ' => 'a','ẵ' => 'a','ư' => 'u',
'ằ' => 'a','ầ' => 'a','ḑ' => 'd','Ḩ' => 'H','Ḑ' => 'D','ḑ' => 'd',
'Ģ' => 'G','Š' => 'S','ļ' => 'l','ž' => 'z','Ē' => 'E','ņ' => 'n',
'Č' => 'C','ș' => 's','ț' => 't', 'ộ' => 'o','ắ' => 'a','ş' => 's',
"'" => '',
);

return str_replace(array_keys($transliterationTable), array_values($transliterationTable), $string);
}

private static function transliterate($string)
{
if (function_exists('transliterator_transliterate')) {
$string = transliterator_transliterate("Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC; Lower();", $string);
$string = preg_replace('/(?!\.)\W/', '', $string);
} else {
$string = static::toAscii($string);
}

return $string;
}

private static function transliterateEmail($email)
{
$emailParts = explode('@', $email);
$emailParts[0] = static::transliterate($emailParts[0]);
$emailParts[1] = static::transliterate($emailParts[1]);
$email = implode('@', $emailParts);

return $email;
}
/**
* @example 'jdoe@acme.biz'
*/
public function email()
{
$format = static::randomElement(static::$emailFormats);

return preg_replace('/\s/u', '', $this->generator->parse($format));
return static::transliterateEmail($this->generator->parse($format));
}

/**
Expand Down Expand Up @@ -91,10 +220,10 @@ final public static function safeEmailDomain()
public function userName()
{
$format = static::randomElement(static::$userNameFormats);
$username = static::bothify($this->generator->parse($format));

return static::toLower(static::bothify($this->generator->parse($format)));
return preg_replace('/\s/u', '', static::transliterate($username));
}

/**
* @example 'tiramisu.com'
*/
Expand Down
23 changes: 0 additions & 23 deletions src/Faker/Provider/bg_BG/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,4 @@ class Internet extends \Faker\Provider\Internet
{
protected static $freeEmailDomain = array('gmail.com', 'yahoo.com', 'hotmail.com', 'mail.bg', 'abv.bg', 'dir.bg');
protected static $tld = array('bg', 'bg', 'bg', 'bg', 'bg', 'bg', 'com', 'biz', 'info', 'net', 'org');

/**
* @example 'jdoe'
*/
public function userName()
{
$format = static::randomElement(static::$userNameFormats);

return static::bothify($this->generator->parse($format));
}

/**
* @example 'faber'
*/
public function domainWord()
{
$company = $this->generator->format('company');
$companyElements = explode(' ', $company);
$company = $companyElements[0];
$company = preg_replace('/\W/u', '', $company);

return $company;
}
}
25 changes: 0 additions & 25 deletions src/Faker/Provider/cs_CZ/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,6 @@

class Internet extends \Faker\Provider\Internet
{

protected static $freeEmailDomain = array('gmail.com', 'yahoo.com', 'seznam.cz', 'atlas.cz', 'centrum.cz', 'email.cz', 'post.cz');

protected static $tld = array('cz', 'cz', 'cz', 'cz', 'cz', 'cz', 'com', 'info', 'net', 'org');

/**
* Converts czech characters to their ASCII representation
*
* @return string
*/
private function toAscii($string)
{
$from = array('Ě', 'ě', 'Š', 'š', 'Č', 'č', 'Ř', 'ř', 'Ž', 'ž', 'Ý', 'ý', 'Á', 'á', 'Í', 'í', 'É', 'é', 'Ó', 'ó', 'Ú', 'ú', 'Ů', 'ů', 'Ď', 'ď', 'Ť', 'ť', 'Ň', 'ň');
$to = array('E', 'e', 'S', 's', 'C', 'c', 'R', 'r', 'Z', 'z', 'Y', 'y', 'A', 'a', 'I', 'i', 'E', 'e', 'O', 'o', 'U', 'u', 'U', 'u', 'D', 'd', 'T', 't', 'N', 'n');

return str_replace($from, $to, $string);
}

public function email()
{
return $this->toAscii(parent::email());
}

public function userName()
{
return $this->toAscii(parent::userName());
}
}
38 changes: 0 additions & 38 deletions src/Faker/Provider/da_DK/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,4 @@ class Internet extends \Faker\Provider\Internet
protected static $tld = array(
'com', 'com', 'com', 'biz', 'info', 'net', 'org', 'dk', 'dk', 'dk',
);

/**
* Converts Danish characters to their ASCII representation
*
* @return string
*/
private static function toAscii($string)
{
$from = array('æ', 'ø', 'å', 'Æ', 'Ø', 'Å');
$to = array('ae', 'oe', 'aa', 'AE', 'OE', 'AA');

return str_replace($from, $to, $string);
}

/**
* @example 'jeppe'
* @return string
*/
public function userName()
{
$format = static::randomElement(static::$userNameFormats);

return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
}

/**
* @example 'jensen.dk'
* @return string
*/
public function domainWord()
{
$company = $this->generator->format('company');
$companyElements = explode(' ', $company);
$company = $companyElements[0];
$company = preg_replace('/\W/u', '', $company);

return static::toLower(static::toAscii($company));
}
}
36 changes: 0 additions & 36 deletions src/Faker/Provider/de_DE/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,4 @@ class Internet extends \Faker\Provider\Internet
{
protected static $freeEmailDomain = array('web.de', 'gmail.com', 'hotmail.de', 'yahoo.de', 'googlemail.com', 'aol.de', 'gmx.de');
protected static $tld = array('com', 'com', 'com', 'net', 'org', 'de', 'de', 'de');

/**
* Converts French characters to their ASCII representation
*
* @return string
*/
private static function toAscii($string)
{
$from = array('ä', 'Ä', 'ü', 'Ü', 'ö', 'Ö', 'ß');
$to = array('a', 'A', 'u', 'U', 'o', 'O', 'ss');

return str_replace($from, $to, $string);
}

/**
* @example 'jdoe'
*/
public function userName()
{
$format = static::randomElement(static::$userNameFormats);

return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
}

/**
* @example 'faber'
*/
public function domainWord()
{
$company = $this->generator->format('company');
$companyElements = explode(' ', $company);
$company = $companyElements[0];
$company = preg_replace('/\W/u', '', $company);

return static::toLower(static::toAscii($company));
}
}
35 changes: 0 additions & 35 deletions src/Faker/Provider/es_ES/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,39 +6,4 @@ class Internet extends \Faker\Provider\Internet
{
protected static $freeEmailDomain = array('gmail.com', 'hotmail.com', 'hotmail.es', 'yahoo.com', 'yahoo.es', 'live.com', 'hispavista.com', 'latinmail.com', 'terra.com');
protected static $tld = array('com', 'com', 'com', 'com', 'net', 'org', 'org', 'es', 'es', 'es', 'com.es');

/**
* Converts Spanish characters to their ASCII representation using an standard
* chars convert function
*
* @return string
*/
private static function toAscii($string)
{
$from = array('á', 'Á', 'é', 'É', 'í', 'Í', 'ó', 'Ó', 'ú', 'Ú', 'ü', 'Ü', 'ñ', 'Ñ');
$to = array('a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'u', 'U', 'n', 'N');

return str_replace($from, $to, $string);
}

/**
* @example 'alex.iglesias'
*/
public function userName()
{
$format = static::randomElement(static::$userNameFormats);
$user = static::bothify($this->generator->parse($format));

return str_replace('-', '.', static::toLower(static::toAscii($user)));
}

/**
* @example 'lovato-exposito'
*/
public function domainWord()
{
list($company) = explode(' ', $this->generator->format('company'));

return static::toLower(static::toAscii(preg_replace('/\W/u', '', $company)));
}
}
36 changes: 0 additions & 36 deletions src/Faker/Provider/fr_BE/Internet.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,4 @@ class Internet extends \Faker\Provider\Internet
{
protected static $freeEmailDomain = array('gmail.com', 'hotmail.com', 'yahoo.com', 'advalvas.be');
protected static $tld = array('com', 'net', 'org', 'be');

/**
* Converts French characters to their ASCII representation
*
* @return string
*/
private static function toAscii($string)
{
$from = array('à', 'À', 'ç', 'Ç', 'é', 'É', 'è', 'È', 'ë', 'Ë', 'ï', 'Ï', 'î', 'Î', 'ô', 'Ô', 'ù', 'Ù');
$to = array('a', 'A', 'c', 'c', 'e', 'E', 'e', 'E', 'e', 'E', 'i', 'I', 'i', 'I', 'o', 'O', 'u', 'U');

return str_replace($from, $to, $string);
}

/**
* @example 'jdoe'
*/
public function userName()
{
$format = static::randomElement(static::$userNameFormats);

return static::toLower(static::toAscii(static::bothify($this->generator->parse($format))));
}

/**
* @example 'faber'
*/
public function domainWord()
{
$company = $this->generator->format('company');
$companyElements = explode(' ', $company);
$company = $companyElements[0];
$company = preg_replace('/\W/u', '', $company);

return static::toLower(static::toAscii($company));
}
}
Loading