Skip to content

Commit

Permalink
Merge branch 'feature/open-embeddings-cli-command' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
dkotter committed Jun 30, 2023
2 parents 8ecedc7 + d06404b commit 81b6f9b
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 2 deletions.
41 changes: 41 additions & 0 deletions hookdocs/wp-cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,47 @@ The following WP-CLI commands are supported by ClassifAI:
* `true` to run in dry-run mode
* `false` to run in normal mode

* `wp classifai embeddings <post_ids> [--post_type=<post_type>] [--post_status=<post_status>] [--per_page=<per_page>] [--dry-run=<bool>]`

Batch classification of items using the OpenAI Embeddings API.

* `<post_ids>`: A comma-delimited list of post IDs to classify. Used if `post_type` is `false` or absent.

default: `null`

* `[--post_type=<post_type>]`: Batch process items belonging to this post type. If `false` or absent, will rely on `post_ids`.

default: `false`

options:

* any post type name

* `[--post_status=<post_status>]`: Batch process items that have this post status. Defaults to `publish`.

default: `publish`

options:

* any post status name

* `[--per_page=<int>]`: How many items should be processed at a time. Will still process all items but will do it in batches matching this number. Defaults to 100.

default: `100`

options:

* N, max number of items to process at a time

* `[--dry-run=<bool>]`: Whether to run as a dry-run. Defaults to `true`, so will run in dry-run mode unless this is set to `false`.

default: `true`

options:

* `true` to run in dry-run mode
* `false` to run in normal mode

### Image Processing Commands

* `wp classifai image <attachment_ids> [--limit=<int>] [--skip=<skip>] [--force]`
Expand Down
158 changes: 158 additions & 0 deletions includes/Classifai/Command/ClassifaiCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use Classifai\Providers\Azure\ComputerVision;
use Classifai\Providers\Azure\SmartCropping;
use Classifai\Providers\Azure\TextToSpeech;
use Classifai\Providers\OpenAI\Embeddings;

/**
* ClassifaiCommand is the command line interface of the ClassifAI plugin.
Expand Down Expand Up @@ -537,6 +538,163 @@ public function crop( $args = [], $opts = [] ) {

}

/**
* Batch classify content using the OpenAI Embeddings API depending on passed-in settings.
*
* ## Options
*
* [<post_ids>]
* : Comma-delimited list of post IDs to classify
*
* [--post_type=<post_type>]
* : Batch process items belonging to this post type. If not used, relies on post_ids in args
*
* [--post_status=<post_status>]
* : Batch process items that have this post status. Default publish
* [--per_page=<int>]
* : How many items should be processed at a time. Default 100
*
* [--dry-run=<bool>]
* : Whether to run as a dry-run. Default true
*
* @param array $args Arguments.
* @param array $opts Options.
*/
public function embeddings( $args = [], $opts = [] ) {
$defaults = [
'post_type' => false,
'post_status' => 'publish',
'per_page' => 100,
];

$embeddings = new Embeddings( false );
$opts = wp_parse_args( $opts, $defaults );
$opts['per_page'] = (int) $opts['per_page'] > 0 ? $opts['per_page'] : 100;
$allowed_post_types = $embeddings->supported_post_types();
$allowed_post_status = $embeddings->supported_post_statuses();

$count = 0;
$errors = 0;

// Determine if this is a dry run or not.
if ( isset( $opts['dry-run'] ) ) {
if ( 'false' === $opts['dry-run'] ) {
$dry_run = false;
} else {
$dry_run = (bool) $opts['dry-run'];
}
} else {
$dry_run = true;
}

if ( $dry_run ) {
\WP_CLI::line( '--- Running command in dry-run mode ---' );
}

// If we have a post type specified, process all items in that type.
if ( ! empty( $opts['post_type'] ) ) {
// Only allow processing post types that are enabled in settings.
if ( $opts['post_type'] && ! in_array( $opts['post_type'], $allowed_post_types, true ) ) {
\WP_CLI::error( sprintf( 'The "%s" post type is not enabled for OpenAI Embeddings processing', $opts['post_type'] ) );
}

// Only allow processing post statuses that are valid for a particular post type.
if ( ! in_array( $opts['post_status'], get_available_post_statuses( $opts['post_type'] ), true ) || ! in_array( $opts['post_status'], $allowed_post_status, true ) ) {
\WP_CLI::error( sprintf( 'The "%s" post status is not valid for the "%s" post type', $opts['post_status'], $opts['post_type'] ) );
}

\WP_CLI::log( sprintf( 'Starting processing of "%s" post type items that have the "%s" status in batches of %d', $opts['post_type'], $opts['post_status'], $opts['per_page'] ) );

$paged = 1;

do {
$posts = get_posts(
array(
'post_type' => $opts['post_type'],
'posts_per_page' => $opts['per_page'],
'paged' => $paged,
'post_status' => $opts['post_status'],
'suppress_filters' => 'false',
'fields' => 'ids',
)
);
$total = count( $posts );

foreach ( $posts as $post_id ) {
if ( ! $dry_run ) {
$result = $embeddings->generate_embeddings_for_post( $post_id );

if ( is_wp_error( $result ) ) {
\WP_CLI::error( sprintf( 'Error while processing item ID %s', $post_id ), false );
$errors ++;
}
}

$count ++;
}

$this->inmemory_cleanup();

if ( $total ) {
\WP_CLI::log( sprintf( 'Batch %d is done, proceeding to next batch', $paged ) );
}

$paged ++;
} while ( $total );
} else {
// If no post type is specified, we have to have a list of post IDs.
if ( ! isset( $args[0] ) ) {
\WP_CLI::error( 'Please specify a comma-delimited list of post IDs to process' );
}

$post_ids = array_map( 'absint', explode( ',', $args[0] ) );

\WP_CLI::log( sprintf( 'Starting processing of %s items', count( $post_ids ) ) );

$progress_bar = \WP_CLI\Utils\make_progress_bar( 'Processing ...', count( $post_ids ) );

foreach ( $post_ids as $post_id ) {
// Ensure we have a valid post ID.
if ( ! get_post( $post_id ) ) {
\WP_CLI::error( sprintf( 'Item ID %d does not exist', $post_id ), false );
$errors ++;
continue;
}

// Ensure we have a valid post type.
$post_type = get_post_type( $post_id );
if ( ! $post_type || ! in_array( $post_type, $allowed_post_types, true ) ) {
\WP_CLI::error( sprintf( 'The "%s" post type is not enabled for OpenAI Embeddings processing', $post_type ), false );
$errors ++;
continue;
}

if ( ! $dry_run ) {
$result = $embeddings->generate_embeddings_for_post( $post_id );

if ( is_wp_error( $result ) ) {
\WP_CLI::error( sprintf( 'Error while processing item ID %s', $post_id ), false );
$errors ++;
}
}

$progress_bar->tick();
$count ++;
}

$progress_bar->finish();
}

if ( ! $dry_run ) {
\WP_CLI::success( sprintf( '%d items have been processed', $count ) );
} else {
\WP_CLI::success( sprintf( '%d items would have been processed', $count ) );
}

\WP_CLI::log( sprintf( '%d items had errors', $errors ) );
}

/**
* Prints the Basic Auth header based on credentials configured in
* the plugin.
Expand Down
4 changes: 2 additions & 2 deletions includes/Classifai/Providers/OpenAI/Embeddings.php
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ public function generate_embeddings_for_post( $post_id ) {
}

// Ensure the user has permissions to edit.
if ( ! current_user_can( 'edit_post', $post_id ) ) {
if ( ! current_user_can( 'edit_post', $post_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) {
return;
}

Expand Down Expand Up @@ -453,7 +453,7 @@ private function set_terms( int $post_id = 0, array $embedding = [] ) {

// Get embedding similarity for each term.
foreach ( $terms as $term_id ) {
if ( ! current_user_can( 'assign_term', $term_id ) ) {
if ( ! current_user_can( 'assign_term', $term_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) {
continue;
}

Expand Down

0 comments on commit 81b6f9b

Please sign in to comment.