-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc3aafc
commit e1ee96a
Showing
11 changed files
with
197 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<?php | ||
|
||
namespace LLPhant\Embeddings\DocumentStore; | ||
|
||
use LLPhant\Embeddings\Document; | ||
|
||
interface DocumentStore | ||
{ | ||
/** | ||
* @return Document[] | ||
*/ | ||
public function fetchDocumentsByChunkRange(string $sourceType, string $sourceName, int $leftIndex, int $rightIndex): array; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
<?php | ||
|
||
namespace LLPhant\Query\SemanticSearch; | ||
|
||
class PipeDocumentsTransformer implements RetrievedDocumentsTransformer | ||
{ | ||
/** | ||
* @var RetrievedDocumentsTransformer[] | ||
*/ | ||
private readonly array $transformers; | ||
|
||
public function __construct(RetrievedDocumentsTransformer ...$transformers) | ||
{ | ||
$this->transformers = $transformers; | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
public function transformDocuments(array $questions, array $retrievedDocs): array | ||
{ | ||
$docs = $retrievedDocs; | ||
|
||
foreach ($this->transformers as $transformer) { | ||
$docs = $transformer->transformDocuments($questions, $docs); | ||
} | ||
|
||
return $docs; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
<?php | ||
|
||
namespace LLPhant\Query\SemanticSearch; | ||
|
||
use LLPhant\Embeddings\Document; | ||
use LLPhant\Embeddings\DocumentStore\DocumentStore; | ||
|
||
class SiblingsDocumentTransformer implements RetrievedDocumentsTransformer | ||
{ | ||
public function __construct(private readonly DocumentStore $documentStore, private readonly int $nrOfSiblings) | ||
{ | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
public function transformDocuments(array $questions, array $retrievedDocs): array | ||
{ | ||
/** @var Document[] $extraDocs */ | ||
$extraDocs = []; | ||
foreach ($retrievedDocs as $retrievedDoc) { | ||
[$leftIndex, $rightIndex] = $this->getIndices($retrievedDoc->chunkNumber, $this->nrOfSiblings); | ||
\array_push( | ||
$extraDocs, | ||
...$this->documentStore->fetchDocumentsByChunkRange($retrievedDoc->sourceType, $retrievedDoc->sourceName, $leftIndex, $rightIndex)); | ||
} | ||
|
||
return $extraDocs; | ||
} | ||
|
||
/** | ||
* @return int[] | ||
*/ | ||
private function getIndices(int $position, int $numElements): array | ||
{ | ||
if ($position < 0 || $numElements <= 0) { | ||
throw new \InvalidArgumentException('Both position and numElements must be positive integers.'); | ||
} | ||
|
||
$halfDistance = intdiv($numElements - 1, 2); | ||
$halfDistance = min($position, $halfDistance); | ||
$leftIndex = $position - $halfDistance; | ||
$rightIndex = $position + ($numElements - 1 - $halfDistance); | ||
|
||
return [$leftIndex, $rightIndex]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
tests/Integration/Query/SemanticSearch/SiblingsDocumentTransformerTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Tests\Integration\Query\SemanticSearch; | ||
|
||
it('Todo: write this test', function () { | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
tests/Unit/Query/SemanticSearch/PipeDocumentTransformerTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Tests\Unit\Query\SemanticSearch; | ||
|
||
use LLPhant\Query\SemanticSearch\PipeDocumentsTransformer; | ||
use LLPhant\Query\SemanticSearch\RetrievedDocumentsTransformer; | ||
use Tests\Fixtures\DocumentFixtures; | ||
|
||
function transformer(string $color): RetrievedDocumentsTransformer | ||
{ | ||
return new class($color) implements RetrievedDocumentsTransformer | ||
{ | ||
public function __construct(private readonly string $color) | ||
{ | ||
} | ||
|
||
public function transformDocuments(array $questions, array $retrievedDocs): array | ||
{ | ||
foreach ($retrievedDocs as $retrievedDoc) { | ||
$retrievedDoc->content .= ' '.$this->color; | ||
} | ||
|
||
return $retrievedDocs; | ||
} | ||
}; | ||
} | ||
|
||
it('can pipe transformations', function () { | ||
$transformer = new PipeDocumentsTransformer(transformer('green'), transformer('white'), transformer('red')); | ||
$transformed = $transformer->transformDocuments(['sample'], DocumentFixtures::documents('one', 'two', 'three')); | ||
expect($transformed[0]->content)->toBe('one green white red') | ||
->and($transformed[1]->content)->toBe('two green white red') | ||
->and($transformed[2]->content)->toBe('three green white red'); | ||
}); |
36 changes: 36 additions & 0 deletions
36
tests/Unit/Query/SemanticSearch/SiblingsDocumentTransformerTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Tests\Unit\Query\SemanticSearch; | ||
|
||
use LLPhant\Embeddings\Document; | ||
use LLPhant\Embeddings\DocumentStore\DocumentStore; | ||
use LLPhant\Query\SemanticSearch\SiblingsDocumentTransformer; | ||
use Tests\Fixtures\DocumentFixtures; | ||
|
||
function documentStore(): DocumentStore | ||
{ | ||
return new class implements DocumentStore | ||
{ | ||
public function fetchDocumentsByChunkRange(string $sourceType, string $sourceName, int $leftIndex, int $rightIndex): array | ||
{ | ||
/** @var Document[] $documents */ | ||
$documents = []; | ||
for ($i = $leftIndex; $i <= $rightIndex; $i++) { | ||
$documents[] = DocumentFixtures::documentChunk($i, $sourceType, $sourceName); | ||
} | ||
|
||
return $documents; | ||
} | ||
}; | ||
} | ||
|
||
it('can extract right data from document store', function () { | ||
$documents = [DocumentFixtures::documentChunk(7, 'txt', 'test')]; | ||
$transformer = new SiblingsDocumentTransformer(documentStore(), 20); | ||
$transformedDocuments = $transformer->transformDocuments(['Sample question'], $documents); | ||
expect(count($transformedDocuments))->toBe(20) | ||
->and($transformedDocuments[0]->chunkNumber)->toBe(0) | ||
->and($transformedDocuments[19]->chunkNumber)->toBe(19); | ||
}); |