diff --git a/docs/components/platform.rst b/docs/components/platform.rst index 6bea67f72..e2f2b974a 100644 --- a/docs/components/platform.rst +++ b/docs/components/platform.rst @@ -102,6 +102,9 @@ Supported Models & Platforms * `LM Studio Catalog`_ and `HuggingFace`_ Models with `LM Studio`_ as Platform. * All models provided by `HuggingFace`_ can be listed with a command in the examples folder, and also filtered, e.g. ``php examples/huggingface/_model-listing.php --provider=hf-inference --task=object-detection`` +* **Voice Models** + * `Cartesia TTS` with `Cartesia`_ as Platform + * `Cartesia STT` with `Cartesia`_ as Platform Options ------- @@ -463,6 +466,7 @@ Code Examples .. _`Anthropic's Claude`: https://www.anthropic.com/claude .. _`Anthropic`: https://www.anthropic.com/ .. _`AWS Bedrock`: https://aws.amazon.com/bedrock/ +.. _`Cartesia`: https://cartesia.ai/sonic .. _`Meta's Llama`: https://www.llama.com/ .. _`Ollama`: https://ollama.com/ .. _`Replicate`: https://replicate.com/ diff --git a/examples/.env b/examples/.env index 5ec1b247d..3f3652496 100644 --- a/examples/.env +++ b/examples/.env @@ -52,6 +52,10 @@ OPENROUTER_KEY= ELEVEN_LABS_URL=https://api.elevenlabs.io/v1 ELEVEN_LABS_API_KEY= +# For using Cartesia +CARTESIA_API_KEY= +CARTESIA_API_VERSION=2025-04-16 + # For using SerpApi (tool) SERP_API_KEY= diff --git a/examples/cartesia/README.md b/examples/cartesia/README.md new file mode 100644 index 000000000..461f73f0b --- /dev/null +++ b/examples/cartesia/README.md @@ -0,0 +1,9 @@ +# Cartesia Examples + +One use case of Cartesia is to convert text to speech, which creates audio files from text input. + +To run the examples, you can use additional tools like (mpg123)[https://www.mpg123.de/]: + +```bash +php cartesia/text-to-speech.php | mpg123 - +``` diff --git a/examples/cartesia/speech-to-text.php b/examples/cartesia/speech-to-text.php new file mode 100644 index 000000000..63a815e4f --- /dev/null +++ b/examples/cartesia/speech-to-text.php @@ -0,0 +1,25 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Platform\Bridge\Cartesia\PlatformFactory; +use Symfony\AI\Platform\Message\Content\Audio; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create( + apiKey: env('CARTESIA_API_KEY'), + version: env('CARTESIA_API_VERSION'), + httpClient: http_client(), +); + +$result = $platform->invoke('ink-whisper', Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3')); + +echo $result->asText().\PHP_EOL; diff --git a/examples/cartesia/text-to-speech.php b/examples/cartesia/text-to-speech.php new file mode 100644 index 000000000..b5c953d6f --- /dev/null +++ b/examples/cartesia/text-to-speech.php @@ -0,0 +1,32 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Platform\Bridge\Cartesia\PlatformFactory; +use Symfony\AI\Platform\Message\Content\Text; + +require_once dirname(__DIR__).'/bootstrap.php'; + +$platform = PlatformFactory::create( + apiKey: env('CARTESIA_API_KEY'), + version: env('CARTESIA_API_VERSION'), + httpClient: http_client(), +); + +$result = $platform->invoke('sonic-3', new Text('Hello world'), [ + 'voice' => '6ccbfb76-1fc6-48f7-b71d-91ac6298247b', // Tessa (https://play.cartesia.ai/voices/6ccbfb76-1fc6-48f7-b71d-91ac6298247b) + 'output_format' => [ + 'container' => 'mp3', + 'sample_rate' => 48000, + 'bit_rate' => 192000, + ], +]); + +echo $result->asBinary().\PHP_EOL; diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index 635f764e8..39b34ee78 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -54,6 +54,16 @@ ->end() ->end() ->end() + ->arrayNode('cartesia') + ->children() + ->stringNode('api_key')->isRequired()->end() + ->stringNode('version')->isRequired()->end() + ->stringNode('http_client') + ->defaultValue('http_client') + ->info('Service ID of the HTTP client to use') + ->end() + ->end() + ->end() ->arrayNode('eleven_labs') ->children() ->stringNode('host')->end() diff --git a/src/ai-bundle/config/services.php b/src/ai-bundle/config/services.php index eace5fb61..b1135e82f 100644 --- a/src/ai-bundle/config/services.php +++ b/src/ai-bundle/config/services.php @@ -28,6 +28,7 @@ use Symfony\AI\Platform\Bridge\Anthropic\Contract\AnthropicContract; use Symfony\AI\Platform\Bridge\Anthropic\ModelCatalog as AnthropicModelCatalog; use Symfony\AI\Platform\Bridge\Anthropic\TokenOutputProcessor as AnthropicTokenOutputProcessor; +use Symfony\AI\Platform\Bridge\Cartesia\ModelCatalog as CartesiaModelCatalog; use Symfony\AI\Platform\Bridge\Cerebras\ModelCatalog as CerebrasModelCatalog; use Symfony\AI\Platform\Bridge\DeepSeek\ModelCatalog as DeepSeekModelCatalog; use Symfony\AI\Platform\Bridge\DockerModelRunner\ModelCatalog as DockerModelRunnerModelCatalog; @@ -85,6 +86,7 @@ // model catalog ->set('ai.platform.model_catalog.aimlapi', AiMlApiModelCatalog::class) ->set('ai.platform.model_catalog.anthropic', AnthropicModelCatalog::class) + ->set('ai.platform.model_catalog.cartesia', CartesiaModelCatalog::class) ->set('ai.platform.model_catalog.cerebras', CerebrasModelCatalog::class) ->set('ai.platform.model_catalog.deepseek', DeepSeekModelCatalog::class) ->set('ai.platform.model_catalog.dockermodelrunner', DockerModelRunnerModelCatalog::class) diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index 25d6a3ea1..5cb45873d 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -41,6 +41,7 @@ use Symfony\AI\Chat\MessageStoreInterface; use Symfony\AI\Platform\Bridge\Anthropic\PlatformFactory as AnthropicPlatformFactory; use Symfony\AI\Platform\Bridge\Azure\OpenAi\PlatformFactory as AzureOpenAiPlatformFactory; +use Symfony\AI\Platform\Bridge\Cartesia\PlatformFactory as CartesiaPlatformFactory; use Symfony\AI\Platform\Bridge\Cerebras\PlatformFactory as CerebrasPlatformFactory; use Symfony\AI\Platform\Bridge\DeepSeek\PlatformFactory as DeepSeekPlatformFactory; use Symfony\AI\Platform\Bridge\DockerModelRunner\PlatformFactory as DockerModelRunnerPlatformFactory; @@ -293,6 +294,26 @@ private function processPlatformConfig(string $type, array $platform, ContainerB return; } + if ('cartesia' === $type) { + $definition = (new Definition(Platform::class)) + ->setFactory(CartesiaPlatformFactory::class.'::create') + ->setLazy(true) + ->addTag('proxy', ['interface' => PlatformInterface::class]) + ->setArguments([ + $platform['api_key'], + $platform['version'], + new Reference($platform['http_client'], ContainerInterface::NULL_ON_INVALID_REFERENCE), + new Reference('ai.platform.model_catalog.cartesia'), + null, + new Reference('event_dispatcher'), + ]) + ->addTag('ai.platform', ['name' => 'cartesia']); + + $container->setDefinition('ai.platform.cartesia', $definition); + + return; + } + if ('eleven_labs' === $type) { $platformId = 'ai.platform.eleven_labs'; $definition = (new Definition(Platform::class)) diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index ba1a461c3..e965d2b27 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -2795,6 +2795,11 @@ private function getFullConfig(): array 'api_version' => '2024-02-15-preview', ], ], + 'cartesia' => [ + 'api_key' => 'cartesia_key_full', + 'version' => '2025-04-16', + 'http_client' => 'http_client', + ], 'eleven_labs' => [ 'host' => 'https://api.elevenlabs.io/v1', 'api_key' => 'eleven_labs_key_full', diff --git a/src/platform/CHANGELOG.md b/src/platform/CHANGELOG.md index 1fc9c985f..3bf4557d3 100644 --- a/src/platform/CHANGELOG.md +++ b/src/platform/CHANGELOG.md @@ -27,6 +27,7 @@ CHANGELOG - AI/ML API (language models and embeddings) - Docker Model Runner (local model hosting) - Scaleway (language models like OpenAI OSS, Llama 4, Qwen 3, and more) + - Cartesia (voice model that supports both text-to-speech and speech-to-text) * Add comprehensive message system with role-based messaging: - `UserMessage` for user inputs with multi-modal content - `SystemMessage` for system instructions diff --git a/src/platform/src/Bridge/Cartesia/Cartesia.php b/src/platform/src/Bridge/Cartesia/Cartesia.php new file mode 100644 index 000000000..e0899db3b --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/Cartesia.php @@ -0,0 +1,21 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia; + +use Symfony\AI\Platform\Model; + +/** + * @author Guillaume Loulier + */ +final class Cartesia extends Model +{ +} diff --git a/src/platform/src/Bridge/Cartesia/CartesiaClient.php b/src/platform/src/Bridge/Cartesia/CartesiaClient.php new file mode 100644 index 000000000..d5df536fe --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/CartesiaClient.php @@ -0,0 +1,91 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia; + +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\Exception\RuntimeException; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\ModelClientInterface; +use Symfony\AI\Platform\Result\RawHttpResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + */ +final class CartesiaClient implements ModelClientInterface +{ + public function __construct( + private readonly HttpClientInterface $httpClient, + #[\SensitiveParameter] private readonly string $apiKey, + private readonly string $version, + ) { + } + + public function supports(Model $model): bool + { + return $model instanceof Cartesia; + } + + public function request(Model $model, array|string $payload, array $options = []): RawResultInterface + { + return match (true) { + \in_array(Capability::TEXT_TO_SPEECH, $model->getCapabilities()) => $this->doTextToSpeech($model, $payload, $options), + \in_array(Capability::SPEECH_TO_TEXT, $model->getCapabilities()) => $this->doSpeechToText($model, $payload, $options), + default => throw new RuntimeException(\sprintf('The model "%s" is not supported.', $model->getName())), + }; + } + + /** + * @param array $payload + * @param array $options + */ + private function doTextToSpeech(Model $model, array|string $payload, array $options): RawHttpResult + { + return new RawHttpResult($this->httpClient->request('POST', 'https://api.cartesia.ai/tts/bytes', [ + 'auth_bearer' => $this->apiKey, + 'headers' => [ + 'Cartesia-Version' => $this->version, + ], + 'json' => [ + ...$options, + 'model_id' => $model->getName(), + 'transcript' => $payload['text'], + 'voice' => [ + 'mode' => 'id', + 'id' => $options['voice'], + ], + 'output_format' => $options['output_format'], + ], + ])); + } + + /** + * @param array $payload + * @param array $options + */ + private function doSpeechToText(Model $model, array|string $payload, array $options): RawHttpResult + { + return new RawHttpResult($this->httpClient->request('POST', 'https://api.cartesia.ai/stt', [ + 'auth_bearer' => $this->apiKey, + 'headers' => [ + 'Cartesia-Version' => $this->version, + ], + 'body' => [ + ...$options, + 'model' => $model->getName(), + 'file' => fopen($payload['input_audio']['path'], 'r'), + 'timestamp_granularities[]' => 'word', + ], + ])); + } +} diff --git a/src/platform/src/Bridge/Cartesia/CartesiaResultConverter.php b/src/platform/src/Bridge/Cartesia/CartesiaResultConverter.php new file mode 100644 index 000000000..6248ea8b2 --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/CartesiaResultConverter.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia; + +use Symfony\AI\Platform\Exception\RuntimeException; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Result\BinaryResult; +use Symfony\AI\Platform\Result\RawResultInterface; +use Symfony\AI\Platform\Result\ResultInterface; +use Symfony\AI\Platform\Result\TextResult; +use Symfony\AI\Platform\ResultConverterInterface; +use Symfony\Contracts\HttpClient\ResponseInterface; + +/** + * @author Guillaume Loulier + */ +final class CartesiaResultConverter implements ResultConverterInterface +{ + public function supports(Model $model): bool + { + return $model instanceof Cartesia; + } + + public function convert(RawResultInterface $result, array $options = []): ResultInterface + { + /** @var ResponseInterface $response */ + $response = $result->getObject(); + + return match (true) { + str_contains($response->getInfo('url'), 'stt') => new TextResult($result->getData()['text']), + str_contains($response->getInfo('url'), 'tts') => new BinaryResult($result->getObject()->getContent(), 'audio/mpeg'), + default => throw new RuntimeException('Unsupported Cartesia response.'), + }; + } +} diff --git a/src/platform/src/Bridge/Cartesia/Contract/AudioNormalizer.php b/src/platform/src/Bridge/Cartesia/Contract/AudioNormalizer.php new file mode 100644 index 000000000..15a87d04d --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/Contract/AudioNormalizer.php @@ -0,0 +1,58 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia\Contract; + +use Symfony\AI\Platform\Message\Content\Audio; +use Symfony\Component\Serializer\Normalizer\NormalizerInterface; + +/** + * @author Guillaume Loulier + */ +final class AudioNormalizer implements NormalizerInterface +{ + /** + * @param Audio $data + * + * @return array{type: 'input_audio', input_audio: array{ + * data: string, + * path: string, + * format: 'mp3'|'wav'|string, + * }} + */ + public function normalize(mixed $data, ?string $format = null, array $context = []): array + { + return [ + 'type' => 'input_audio', + 'input_audio' => [ + 'data' => $data->asBase64(), + 'path' => $data->asPath(), + 'format' => match ($data->getFormat()) { + 'audio/mpeg' => 'mp3', + 'audio/wav' => 'wav', + default => $data->getFormat(), + }, + ], + ]; + } + + public function supportsNormalization(mixed $data, ?string $format = null, array $context = []): bool + { + return $data instanceof Audio; + } + + public function getSupportedTypes(?string $format): array + { + return [ + Audio::class => true, + ]; + } +} diff --git a/src/platform/src/Bridge/Cartesia/Contract/CartesiaContract.php b/src/platform/src/Bridge/Cartesia/Contract/CartesiaContract.php new file mode 100644 index 000000000..6f1da9719 --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/Contract/CartesiaContract.php @@ -0,0 +1,29 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia\Contract; + +use Symfony\AI\Platform\Contract; +use Symfony\Component\Serializer\Normalizer\NormalizerInterface; + +/** + * @author Guillaume Loulier + */ +final class CartesiaContract extends Contract +{ + public static function create(NormalizerInterface ...$normalizer): Contract + { + return parent::create( + new AudioNormalizer(), + ...$normalizer, + ); + } +} diff --git a/src/platform/src/Bridge/Cartesia/ModelCatalog.php b/src/platform/src/Bridge/Cartesia/ModelCatalog.php new file mode 100644 index 000000000..00413e281 --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/ModelCatalog.php @@ -0,0 +1,47 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia; + +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\ModelCatalog\AbstractModelCatalog; + +/** + * @author Guillaume Loulier + */ +final class ModelCatalog extends AbstractModelCatalog +{ + /** + * @param array}> $additionalModels + */ + public function __construct(array $additionalModels = []) + { + $defaultModels = [ + 'sonic-3' => [ + 'class' => Cartesia::class, + 'capabilities' => [ + Capability::TEXT_TO_SPEECH, + ], + ], + 'ink-whisper' => [ + 'class' => Cartesia::class, + 'capabilities' => [ + Capability::SPEECH_TO_TEXT, + ], + ], + ]; + + $this->models = [ + ...$defaultModels, + ...$additionalModels, + ]; + } +} diff --git a/src/platform/src/Bridge/Cartesia/PlatformFactory.php b/src/platform/src/Bridge/Cartesia/PlatformFactory.php new file mode 100644 index 000000000..8bb80aaba --- /dev/null +++ b/src/platform/src/Bridge/Cartesia/PlatformFactory.php @@ -0,0 +1,45 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Bridge\Cartesia; + +use Psr\EventDispatcher\EventDispatcherInterface; +use Symfony\AI\Platform\Bridge\Cartesia\Contract\CartesiaContract; +use Symfony\AI\Platform\Contract; +use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; +use Symfony\AI\Platform\Platform; +use Symfony\Component\HttpClient\EventSourceHttpClient; +use Symfony\Contracts\HttpClient\HttpClientInterface; + +/** + * @author Guillaume Loulier + */ +final class PlatformFactory +{ + public static function create( + string $apiKey, + string $version, + ?HttpClientInterface $httpClient = null, + ModelCatalogInterface $modelCatalog = new ModelCatalog(), + ?Contract $contract = null, + ?EventDispatcherInterface $eventDispatcher = null, + ): Platform { + $httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient); + + return new Platform( + [new CartesiaClient($httpClient, $apiKey, $version)], + [new CartesiaResultConverter()], + $modelCatalog, + $contract ?? CartesiaContract::create(), + $eventDispatcher, + ); + } +} diff --git a/src/platform/tests/Bridge/Cartesia/CartesiaClientTest.php b/src/platform/tests/Bridge/Cartesia/CartesiaClientTest.php new file mode 100644 index 000000000..a18570352 --- /dev/null +++ b/src/platform/tests/Bridge/Cartesia/CartesiaClientTest.php @@ -0,0 +1,164 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\Cartesia; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\Cartesia\Cartesia; +use Symfony\AI\Platform\Bridge\Cartesia\CartesiaClient; +use Symfony\AI\Platform\Bridge\Cartesia\Contract\AudioNormalizer; +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\Exception\RuntimeException; +use Symfony\AI\Platform\Message\Content\Audio; +use Symfony\AI\Platform\Model; +use Symfony\Component\HttpClient\Exception\ClientException; +use Symfony\Component\HttpClient\MockHttpClient; +use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\HttpClient\Response\MockResponse; + +final class CartesiaClientTest extends TestCase +{ + public function testSupportsModel() + { + $client = new CartesiaClient( + new MockHttpClient(), + 'my-api-key', + 'foo', + ); + + $this->assertTrue($client->supports(new Cartesia('sonic-3'))); + $this->assertFalse($client->supports(new Model('any-model'))); + } + + public function testClientCannotPerformOnInvalidModel() + { + $client = new CartesiaClient( + new MockHttpClient(), + 'my-api-key', + 'foo', + ); + + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage('The model "foo" is not supported.'); + $this->expectExceptionCode(0); + $client->request(new Cartesia('foo', []), [ + 'text' => 'bar', + ]); + } + + public function testClientCannotPerformTextToSpeechOnInvalidResponse() + { + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => '', + ], [ + 'http_code' => 400, + ]), + ]); + + $client = new CartesiaClient( + $httpClient, + 'my-api-key', + 'foo', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cartesia.ai/tts/bytes".'); + $this->expectExceptionCode(400); + $client->request(new Cartesia('sonic-3', [Capability::TEXT_TO_SPEECH]), [ + 'text' => 'bar', + ], [ + 'voice' => '6ccbfb76-1fc6-48f7-b71d-91ac6298247b', // Tessa (https://play.cartesia.ai/voices/6ccbfb76-1fc6-48f7-b71d-91ac6298247b) + 'output_format' => [ + 'container' => 'mp3', + 'sample_rate' => 48000, + 'bit_rate' => 192000, + ], + ]); + } + + public function testClientCanPerformTextToSpeech() + { + $payload = Audio::fromFile(\dirname(__DIR__, 5).'/fixtures/audio.mp3'); + + $httpClient = new MockHttpClient([ + new MockResponse($payload->asBinary()), + ]); + + $client = new CartesiaClient( + $httpClient, + 'my-api-key', + 'foo', + ); + + $client->request(new Cartesia('sonic-3', [Capability::TEXT_TO_SPEECH]), [ + 'text' => 'bar', + ], [ + 'voice' => '6ccbfb76-1fc6-48f7-b71d-91ac6298247b', // Tessa (https://play.cartesia.ai/voices/6ccbfb76-1fc6-48f7-b71d-91ac6298247b) + 'output_format' => [ + 'container' => 'mp3', + 'sample_rate' => 48000, + 'bit_rate' => 192000, + ], + ]); + + $this->assertSame(1, $httpClient->getRequestsCount()); + } + + public function testClientCannotPerformSpeechToTextOnInvalidResponse() + { + $payload = Audio::fromFile(\dirname(__DIR__, 2).'/fixtures/audio.mp3'); + + $normalizer = new AudioNormalizer(); + + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'error' => '', + ], [ + 'http_code' => 400, + ]), + ]); + + $client = new CartesiaClient( + $httpClient, + 'my-api-key', + 'foo', + ); + + $this->expectException(ClientException::class); + $this->expectExceptionMessage('HTTP 400 returned for "https://api.cartesia.ai/stt".'); + $this->expectExceptionCode(400); + $client->request(new Cartesia('ink-whisper', [Capability::SPEECH_TO_TEXT]), $normalizer->normalize($payload)); + } + + public function testClientCanPerformSpeechToText() + { + $payload = Audio::fromFile(\dirname(__DIR__, 2).'/fixtures/audio.mp3'); + + $normalizer = new AudioNormalizer(); + + $httpClient = new MockHttpClient([ + new JsonMockResponse([ + 'text' => 'Hello there', + ]), + ]); + + $client = new CartesiaClient( + $httpClient, + 'my-api-key', + 'foo', + ); + + $client->request(new Cartesia('ink-whisper', [Capability::SPEECH_TO_TEXT]), $normalizer->normalize($payload)); + + $this->assertSame(1, $httpClient->getRequestsCount()); + } +} diff --git a/src/platform/tests/Bridge/Cartesia/CartesiaResultConverterTest.php b/src/platform/tests/Bridge/Cartesia/CartesiaResultConverterTest.php new file mode 100644 index 000000000..c1ea9917d --- /dev/null +++ b/src/platform/tests/Bridge/Cartesia/CartesiaResultConverterTest.php @@ -0,0 +1,70 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\Cartesia; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\Cartesia\Cartesia; +use Symfony\AI\Platform\Bridge\Cartesia\CartesiaResultConverter; +use Symfony\AI\Platform\Model; +use Symfony\AI\Platform\Result\BinaryResult; +use Symfony\AI\Platform\Result\InMemoryRawResult; +use Symfony\AI\Platform\Result\TextResult; + +final class CartesiaResultConverterTest extends TestCase +{ + public function testSupportsModel() + { + $converter = new CartesiaResultConverter(); + + $this->assertTrue($converter->supports(new Cartesia('sonic-3'))); + $this->assertFalse($converter->supports(new Model('any-model'))); + } + + public function testConvertSpeechToTextResponse() + { + $converter = new CartesiaResultConverter(); + $rawResult = new InMemoryRawResult([ + 'text' => 'Hello there', + ], [], new class { + public function getInfo(): string + { + return 'stt'; + } + }); + + $result = $converter->convert($rawResult); + + $this->assertInstanceOf(TextResult::class, $result); + $this->assertSame('Hello there', $result->getContent()); + } + + public function testConvertTextToSpeechResponse() + { + $converter = new CartesiaResultConverter(); + $rawResult = new InMemoryRawResult([], [], new class { + public function getInfo(): string + { + return 'tts'; + } + + public function getContent(): string + { + return file_get_contents(\dirname(__DIR__, 5).'/fixtures/audio.mp3'); + } + }); + + $result = $converter->convert($rawResult); + + $this->assertInstanceOf(BinaryResult::class, $result); + $this->assertSame('audio/mpeg', $result->getMimeType()); + } +} diff --git a/src/platform/tests/Bridge/Cartesia/Contract/CartesiaContractTest.php b/src/platform/tests/Bridge/Cartesia/Contract/CartesiaContractTest.php new file mode 100644 index 000000000..ce691da92 --- /dev/null +++ b/src/platform/tests/Bridge/Cartesia/Contract/CartesiaContractTest.php @@ -0,0 +1,38 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\Cartesia\Contract; + +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Bridge\Cartesia\Cartesia; +use Symfony\AI\Platform\Bridge\Cartesia\Contract\CartesiaContract; +use Symfony\AI\Platform\Message\Content\Audio; + +final class CartesiaContractTest extends TestCase +{ + public function testItCanCreatePayloadWithAudio() + { + $audio = Audio::fromFile(\dirname(__DIR__, 6).'/fixtures/audio.mp3'); + + $contract = CartesiaContract::create(); + + $payload = $contract->createRequestPayload(new Cartesia('ink-whisper'), $audio); + + $this->assertSame([ + 'type' => 'input_audio', + 'input_audio' => [ + 'data' => $audio->asBase64(), + 'path' => $audio->asPath(), + 'format' => 'mp3', + ], + ], $payload); + } +} diff --git a/src/platform/tests/Bridge/Cartesia/ModelCatalogTest.php b/src/platform/tests/Bridge/Cartesia/ModelCatalogTest.php new file mode 100644 index 000000000..80930d525 --- /dev/null +++ b/src/platform/tests/Bridge/Cartesia/ModelCatalogTest.php @@ -0,0 +1,32 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Platform\Tests\Bridge\Cartesia; + +use Symfony\AI\Platform\Bridge\Cartesia\Cartesia; +use Symfony\AI\Platform\Bridge\Cartesia\ModelCatalog; +use Symfony\AI\Platform\Capability; +use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface; +use Symfony\AI\Platform\Test\ModelCatalogTestCase; + +final class ModelCatalogTest extends ModelCatalogTestCase +{ + public static function modelsProvider(): iterable + { + yield 'sonic-3' => ['sonic-3', Cartesia::class, [Capability::TEXT_TO_SPEECH]]; + yield 'ink-whisper' => ['ink-whisper', Cartesia::class, [Capability::SPEECH_TO_TEXT]]; + } + + protected function createModelCatalog(): ModelCatalogInterface + { + return new ModelCatalog(); + } +}