Skip to content

Commit

Permalink
Add Modified Hepburn romanji system.
Browse files Browse the repository at this point in the history
  • Loading branch information
zachleigh committed Nov 2, 2015
1 parent 3507ed3 commit 791b5b6
Show file tree
Hide file tree
Showing 17 changed files with 1,231 additions and 273 deletions.
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Find dictionary entries (lemmas) for conjugated words
- Get readings and pronunciations for words
- Build fuirgana for words
- Convert Japanese to romanji (English lettering)

## Contents
- [Quick Guide](#quick-guide)
Expand Down Expand Up @@ -512,15 +513,24 @@ echo $results->plugin('Furigana'); // Output: <ruby>東京<rt>とうきょう</r

### Romanji

The Romanji plugin converts words from Japanese to romanji (English letters). Currently, only [traditional hepburn](https://en.wikipedia.org/wiki/Hepburn_romanization) romanization is available, but other options are coming soon.

The Romanji plugin converts words from Japanese to romanji (English letters). A few different romanji systems are available and can be set in the config.php file in the 'style' key.
```php
'Romanji' => [
'style' => 'hepburn_modified'
]
```

The plugin supports the following romanji systems:
- [Traditional Hepburn](https://en.wikipedia.org/wiki/Hepburn_romanization): traditional_hepburn
- [Modified Hepburn](https://en.wikipedia.org/wiki/Hepburn_romanization): modified_hepburn

To get romanji for a string, parse it and access it on the LimelightResults object.
```php
$limelight = new Limelight();

$results = $limelight->parse('東京に行きます');

echo $results->plugin('Romanji'); // Output: Toukyou ni ikimasu
echo $results->plugin('Romanji'); // Output: Tōkyō ni ikimasu
```
Strings on the LimelightResults object are space seperated.

Expand All @@ -536,7 +546,7 @@ foreach ($results->getNext() as $word) {

// Output
//
// Toukyouniikimasu
// Tōkyōniikimasu
```

Proper nouns are capitalized.
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "nihongodera/limelight",
"type": "project",
"description": "A php Japanese language text analyzer and parser.",
"keywords": ["japanese", "language", "mecab", "furigana", "kanji"],
"keywords": ["japanese", "language", "mecab", "furigana", "kanji", "romanji", "parse"],
"homepage": "https://github.com/zachleigh/limelight",
"license": "MIT",
"authors": [
Expand Down
15 changes: 1 addition & 14 deletions limelight_console
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,4 @@ $limelight = new Limelight();

$results = $limelight->parse('東京に行きます');

// $word = $results->getByIndex(0);

// $romanji = $word->reading()->toRomanji()->get();

// echo $romanji;

// var_dump($results);

foreach ($results->getNext() as $word) {
echo $word->romanji;
}

// echo $word->romanji;
//
var_dump($results);
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,6 @@
'うぁ'=>'wha', 'いぇ'=>'ye', 'うぉ'=>'who',
''=>'xa', ''=>'xi', ''=>'xu', ''=>'xe', ''=>'xo',
''=>'xka', ''=>'xke', ''=>'xwa',
''=>'.'
''=>'.', ''=>',', ''=>'?', ''=>'!',
'' =>'"', ''=>'"'
];
4 changes: 3 additions & 1 deletion src/Plugins/Library/Romanji/Lib/HepburnTraditional.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<?php

return [
''=>'a', ''=>'i', ''=>'u', ''=>'e', ''=>'o',
''=>'ka', ''=>'ki', ''=>'ku', ''=>'ke', ''=>'ko',
Expand Down Expand Up @@ -43,5 +44,6 @@
'うぁ'=>'wha', 'いぇ'=>'ye', 'うぉ'=>'who',
''=>'xa', ''=>'xi', ''=>'xu', ''=>'xe', ''=>'xo',
''=>'xka', ''=>'xke', ''=>'xwa',
''=>'.'
''=>'.', ''=>',', ''=>'?', ''=>'!',
'' =>'"', ''=>'"'
];
57 changes: 46 additions & 11 deletions src/Plugins/Library/Romanji/Romanji.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use Limelight\Limelight;
use Limelight\Plugins\Plugin;
use Limelight\Plugins\Library\Romanji\Styles\Hepburn;

class Romanji extends Plugin
{
Expand All @@ -15,18 +14,14 @@ class Romanji extends Plugin
*/
public function handle()
{
$options = $this->config->get('Romanji');

$styleClass = 'Limelight\\Plugins\\Library\\Romanji\\Styles\\' . ucfirst($this->underscoreToCamelCase($options['style']));

$style = new $styleClass();
$decorator = $this->makeDecoratorClass();

$romanjiString = '';

foreach ($this->words as $word) {
$hiraganaWord = mb_convert_kana($word->reading, 'c');

$romanjiWord = $style->convert($hiraganaWord, $word);
$romanjiWord = $decorator->convert($hiraganaWord, $word);

$word->setPluginData('Romanji', $romanjiWord);

Expand All @@ -37,17 +32,41 @@ public function handle()
$romanjiString .= $romanjiWord;
}

return ucfirst(trim($romanjiString));
$romanjiString = trim($romanjiString);

return $this->uppercaseFirst($romanjiString);
}

/**
* Make decorator class from config value.
*
* @return Limelight\Plugins\Library\Romanji\StyleDecorator
*/
private function makeDecoratorClass()
{
$options = $this->config->get('Romanji');

$style = $this->underscoreToCamelCase($options['style']);

$decoratorClass = 'Limelight\\Plugins\\Library\\Romanji\\Styles\\'.ucfirst($style);

if (class_exists($decoratorClass)) {
$converter = new RomanjiConverter();

return new $decoratorClass($converter);
}

throw new LimelightPluginErrorException("Style {$style} does not exist. Check config.php file.");
}

/**
* Make an underscored word camel-case.
*
* @param string $string
* @param string $string
*
* @return string
* @return string
*/
function underscoreToCamelCase($string)
public function underscoreToCamelCase($string)
{
$string = strtolower($string);

Expand All @@ -67,4 +86,20 @@ function underscoreToCamelCase($string)

return $string;
}

/**
* Multibyte safe ucfirst.
*
* @param string $string
*
* @return string
*/
public function uppercaseFirst($string)
{
$firstChar = mb_substr($string, 0, 1);

$rest = mb_substr($string, 1);

return mb_convert_case($firstChar, MB_CASE_UPPER, 'UTF-8').$rest;
}
}
Loading

0 comments on commit 791b5b6

Please sign in to comment.