@@ -46,8 +46,7 @@ Sync Usage
4646 client = Client(endpoint="https://<MD_OCID>/predict")
4747 response = client.chat(
4848 messages=[{"role": "user", "content": "Tell me a joke."}],
49- payload={"model": "odsc-llm"},
50- stream=False,
49+ payload={"model": "odsc-llm"}
5150 )
5251 print(response)
5352
@@ -58,7 +57,7 @@ Sync Usage
5857 from ads.aqua import Client
5958 ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
6059
61- client = Client(endpoint="https://<MD_OCID>/predict ")
60+ client = Client(endpoint="https://<MD_OCID>/predictWithResponseStream ")
6261 response = client.chat(
6362 messages=[{"role": "user", "content": "Tell me a joke."}],
6463 payload={"model": "odsc-llm"},
@@ -97,8 +96,7 @@ The following examples demonstrate how to perform the same operations using the
9796 client = AsyncClient(endpoint="https://<MD_OCID>/predict")
9897 response = await client.generate(
9998 prompt="Tell me a joke",
100- payload={"model": "odsc-llm"},
101- stream=False,
99+ payload={"model": "odsc-llm"}
102100 )
103101 print(response)
104102
@@ -109,7 +107,7 @@ The following examples demonstrate how to perform the same operations using the
109107 from ads.aqua import AsyncClient
110108 ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
111109
112- client = AsyncClient(endpoint="https://<MD_OCID>/predict ")
110+ client = AsyncClient(endpoint="https://<MD_OCID>/predictWithResponseStream ")
113111 async for chunk in await client.generate(
114112 prompt="Tell me a joke",
115113 payload={"model": "odsc-llm"},
@@ -225,11 +223,33 @@ The synchronous client, ``OpenAI``, extends the OpenAI client. If no HTTP client
225223 " content" : " Tell me a joke." ,
226224 }
227225 ],
228- # stream=True, # enable for streaming
229226 )
230227
231228 print (response)
232229
230+ **Streaming **
231+ For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream ``.
232+
233+ .. code-block :: python
234+
235+ client = OpenAI(
236+ base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1" ,
237+ )
238+
239+ response = client.chat.completions.create(
240+ model = " odsc-llm" ,
241+ messages = [
242+ {
243+ " role" : " user" ,
244+ " content" : " Tell me a joke." ,
245+ }
246+ ],
247+ stream = True
248+ )
249+
250+ for chunk in response:
251+ print (chunk)
252+
233253
234254 **Asynchronous Client **
235255
@@ -246,7 +266,7 @@ The asynchronous client, ``AsynOpenAI``, extends the AsyncOpenAI client. If no a
246266
247267 async def test_async () -> None :
248268 client_async = AsyncOpenAI(
249- base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predict /v1" ,
269+ base_url = " https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream /v1" ,
250270 )
251271 response = await client_async.chat.completions.create(
252272 model = " odsc-llm" ,
0 commit comments