Skip to content

Commit

Permalink
Add noParse method for better kana romanji.
Browse files Browse the repository at this point in the history
  • Loading branch information
zachleigh committed Dec 4, 2015
1 parent 67a531f commit 4799fb5
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 60 deletions.
62 changes: 62 additions & 0 deletions src/Helpers/PluginHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

namespace Limelight\Helpers;

use Limelight\Config\Config;
use Limelight\Plugins\Plugin;

trait PluginHelper
{
/**
* Run all registered plugins.
*
* @param string $text
* @param Node $node
* @param array $tokens
* @param array $words
*
* @return array
*/
protected function runPlugins($text, $node, $tokens, $words)
{
$pluginResults = [];

$config = Config::getInstance();

$plugins = $config->getPlugins();

foreach ($plugins as $plugin => $namespace) {
$this->validatePlugin($namespace);

$pluginClass = new $namespace($text, $node, $tokens, $words);

$pluginResults[$plugin] = $this->firePlugin($pluginClass);
}

return $pluginResults;
}

/**
* Validate plugin class exists.
*
* @param string $namespace
*/
private function validatePlugin($namespace)
{
if (!class_exists($namespace)) {
throw new PluginNotFoundException("Plugin {$namespace} not found.");
}
}

/**
* Fire the plugin.
*
* @param Plugin $plugin
*
* @return mixed
*/
private function firePlugin(Plugin $plugin)
{
return $plugin->handle();
}
}
15 changes: 15 additions & 0 deletions src/Limelight.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Limelight\Parse\Parser;
use Limelight\Config\Config;
use Limelight\Parse\NoParser;
use Limelight\Parse\Tokenizer;
use Limelight\Parse\TokenParser;

Expand Down Expand Up @@ -45,6 +46,20 @@ public function parse($text, $runPlugins = true)
return $parser->handle($text, $runPlugins);
}

/**
* Run given text through plugins without mecab parsing. Kanji input will fail.
*
* @param string $text
*
* @return Limelight\Classes\LimelightResults/ InvalidInputException
*/
public function noParse($text)
{
$noParser = new NoParser();

return $noParser->handle($text);
}

/**
* MeCab parseToNode method. Returns native Limelight node object.
*
Expand Down
75 changes: 75 additions & 0 deletions src/Parse/NoParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
<?php

namespace Limelight\Parse;

use Limelight\Limelight;
use Limelight\Helpers\Converter;
use Limelight\Helpers\PluginHelper;
use Limelight\Classes\LimelightWord;
use Limelight\Helpers\JapaneseHelpers;
use Limelight\Classes\LimelightResults;
use Limelight\Exceptions\InvalidInputException;

class NoParser
{
use PluginHelper;
use JapaneseHelpers;

/**
* Handle the no-parse for given text.
*
* @param string $text
*
* @return LimelightResults
*/
public function handle($text)
{
if ($this->hasKanji($text)) {
throw new InvalidInputException('Text must not contain kanji.');
}

$limelight = new Limelight();

$converter = new Converter($limelight);

$token = $this->buildToken($text);

$properties = $this->buildProperties();

$words = [new LimelightWord($token, $properties, $converter)];

$pluginResults = $this->runPlugins($text, null, $token, $words);

return new LimelightResults($text, $words, $pluginResults);
}

/**
* Build token using raw text for all properties.
*
* @param string $text
*
* @return array
*/
private function buildToken($text)
{
return [
'literal' => $text,
'lemma' => $text,
'reading' => $text,
'pronunciation' => $text,
];
}

/**
* Build array on full properties.
*
* @return array
*/
private function buildProperties()
{
return [
'partOfSpeech' => null,
'grammar' => null,
];
}
}
59 changes: 3 additions & 56 deletions src/Parse/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
namespace Limelight\Parse;

use Limelight\Mecab\Mecab;
use Limelight\Config\Config;
use Limelight\Plugins\Plugin;
use Limelight\Helpers\PluginHelper;
use Limelight\Classes\LimelightResults;
use Limelight\Exceptions\PluginNotFoundException;

class Parser
{
use PluginHelper;

/**
* @var implements Limelight\Mecab\Mecab
*/
Expand Down Expand Up @@ -59,57 +59,4 @@ public function handle($text, $runPlugins)

return new LimelightResults($text, $words, $pluginResults);
}

/**
* Run all registered plugins.
*
* @param string $text
* @param Node $node
* @param array $tokens
* @param array $words
*
* @return array
*/
private function runPlugins($text, $node, $tokens, $words)
{
$pluginResults = [];

$config = Config::getInstance();

$plugins = $config->getPlugins();

foreach ($plugins as $plugin => $namespace) {
$this->validatePlugin($namespace);

$pluginClass = new $namespace($text, $node, $tokens, $words);

$pluginResults[$plugin] = $this->firePlugin($pluginClass);
}

return $pluginResults;
}

/**
* Validate plugin class exists.
*
* @param string $namespace
*/
private function validatePlugin($namespace)
{
if (!class_exists($namespace)) {
throw new PluginNotFoundException("Plugin {$namespace} not found.");
}
}

/**
* Fire the plugin.
*
* @param Plugin $plugin
*
* @return mixed
*/
private function firePlugin(Plugin $plugin)
{
return $plugin->handle();
}
}
28 changes: 24 additions & 4 deletions src/Parse/TokenParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TokenParser
/**
* Parse the text by filtering through the tokens.
*
* @return [type] [description]
* @return array
*/
public function parseTokens($tokens)
{
Expand Down Expand Up @@ -97,6 +97,26 @@ private function getProperties($registry, $previousWord, $previous, $current, $n
return $properties;
}

/**
* Update current if reading does not exist.
*
* @param array $current
*
* @return array
*/
private function updateCurrent($current)
{
$current['lemma'] = $current['literal'];

$katakana = mb_convert_kana($current['literal'], 'C');

$current['reading'] = $katakana;

$current['pronunciation'] = $katakana;

return $current;
}

/**
* Append current word to last word in words array.
*
Expand Down Expand Up @@ -126,9 +146,9 @@ private function appendWordToLast($current, $properties, $previousWord)
/**
* Make new word and append it to words array.
*
* @param array $current
* @param array $properties
* @param Converter $converter
* @param array $current
* @param array $properties
* @param Converter $converter
*/
private function makeNewWord($current, $properties, Converter $converter)
{
Expand Down
2 changes: 2 additions & 0 deletions src/Plugins/Library/Romanji/Romanji.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ public function handle()
$romanjiString = '';

foreach ($this->words as $word) {
$spaces = true;

$hiraganaWord = mb_convert_kana($word->reading, 'c');

$romanjiWord = $style->handle($hiraganaWord, $word);
Expand Down
58 changes: 58 additions & 0 deletions tests/Parse/NoParseTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<?php

namespace Limelight\Tests\Classes;

use Limelight\Limelight;
use Limelight\Tests\TestCase;

class NoParseTest extends TestCase
{
/**
* @var Limelight\Limelight
*/
protected static $limelight;

/**
* Set static limelight on object.
*/
public static function setUpBeforeClass()
{
self::$limelight = new Limelight();
}

/**
* It parses kana text.
*
* @test
*/
public function it_parses_kana_text()
{
$results = self::$limelight->noParse('できるかな。。。');

$this->assertEquals('できるかな。。。', $results->words());
}

/**
* It gets romanji for kana text.
*
* @test
*/
public function it_gets_romanji_for_kana_text()
{
$results = self::$limelight->noParse('ねんがっぴ');

$this->assertEquals('Nengappi', $results->plugin('romanji'));
}

/**
* It throws exception for kanji input.
*
* @test
* @expectedException Limelight\Exceptions\InvalidInputException
* @expectedExceptionMessage Text must not contain kanji.
*/
public function it_throws_exception_for_kanji_text()
{
$results = self::$limelight->noParse('今日');
}
}

0 comments on commit 4799fb5

Please sign in to comment.