From 640b56ca32f0eeed6f710f9f8721215bfa61637b Mon Sep 17 00:00:00 2001 From: Juan Dominguez Date: Mon, 27 Oct 2025 10:58:06 -0300 Subject: [PATCH] feat(genai): add local tokenizer samples --- ...untTokensLocalTokenizerComputeWithTxt.java | 68 +++++++++++++++++++ .../CountTokensLocalTokenizerWithTxt.java | 43 ++++++++++++ .../java/genai/counttokens/CountTokensIT.java | 30 +++++++- 3 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerComputeWithTxt.java create mode 100644 genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerWithTxt.java diff --git a/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerComputeWithTxt.java b/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerComputeWithTxt.java new file mode 100644 index 00000000000..324f99dbca5 --- /dev/null +++ b/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerComputeWithTxt.java @@ -0,0 +1,68 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.counttokens; + +// [START googlegenaisdk_counttoken_localtokenizer_compute_with_txt] + +import com.google.genai.LocalTokenizer; +import com.google.genai.types.ComputeTokensResult; +import com.google.genai.types.TokensInfo; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +public class CountTokensLocalTokenizerComputeWithTxt { + + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample. + String modelId = "gemini-2.5-flash"; + computeTokens(modelId); + } + + // Computes tokens with Local Tokenizer and a text input + public static Optional> computeTokens(String modelId) { + LocalTokenizer tokenizer = new LocalTokenizer(modelId); + ComputeTokensResult result = + tokenizer.computeTokens("What's the longest word in the English language?"); + + result.tokensInfo().ifPresent(tokensInfoList -> { + for (TokensInfo info : tokensInfoList) { + info.role().ifPresent(role -> System.out.println("role: " + role)); + info.tokenIds().ifPresent(tokenIds -> System.out.println("tokenIds: " + tokenIds)); + // Print tokens input as strings since they are in a form of byte array. + System.out.println("tokens: "); + info.tokens().ifPresent(tokens -> + tokens.forEach(token -> + System.out.println(new String(token, StandardCharsets.UTF_8)) + ) + ); + } + }); + // Example response: + // role: user + // tokenIds: [3689, 236789, 236751, 506, 27801, 3658, 528, 506, 5422, 5192, 236881] + // tokens: + // What + // ' + // s + // the + // longest + // ... + return result.tokensInfo(); + } +} +// [END googlegenaisdk_counttoken_localtokenizer_compute_with_txt] diff --git a/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerWithTxt.java b/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerWithTxt.java new file mode 100644 index 00000000000..4226620a7fa --- /dev/null +++ b/genai/snippets/src/main/java/genai/counttokens/CountTokensLocalTokenizerWithTxt.java @@ -0,0 +1,43 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.counttokens; + +// [START googlegenaisdk_counttoken_localtokenizer_with_txt] + +import com.google.genai.LocalTokenizer; +import com.google.genai.types.CountTokensResult; +import java.util.Optional; + +public class CountTokensLocalTokenizerWithTxt { + + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample. + String modelId = "gemini-2.5-flash"; + countTokens(modelId); + } + + // Counts tokens with Local Tokenizer and a text input + public static Optional countTokens(String modelId) { + LocalTokenizer tokenizer = new LocalTokenizer(modelId); + CountTokensResult result = tokenizer.countTokens("What's the highest mountain in Africa?"); + System.out.println(result.totalTokens()); + // Example response: + // Optional[9] + return result.totalTokens(); + } +} +// [END googlegenaisdk_counttoken_localtokenizer_with_txt] diff --git a/genai/snippets/src/test/java/genai/counttokens/CountTokensIT.java b/genai/snippets/src/test/java/genai/counttokens/CountTokensIT.java index 7943c11cbe4..ea70d2dc4b0 100644 --- a/genai/snippets/src/test/java/genai/counttokens/CountTokensIT.java +++ b/genai/snippets/src/test/java/genai/counttokens/CountTokensIT.java @@ -82,7 +82,7 @@ public void testCountTokensWithTextAndVideo() { public void testCountTokensComputeWithText() { List response = - CountTokensComputeWithText.computeTokens(GEMINI_FLASH).orElse(new ArrayList<>()); + CountTokensComputeWithText.computeTokens(GEMINI_FLASH).orElse(new ArrayList<>()); assertThat(response).isNotEmpty(); TokensInfo tokensInfo = response.get(0); @@ -94,20 +94,44 @@ public void testCountTokensComputeWithText() { assertThat(tokensInfo.tokens()).isPresent(); assertThat(tokensInfo.tokens().get()).isNotEmpty(); + } + + @Test + public void testCountTokensLocalTokenizerComputeWithTxt() { + List response = + CountTokensLocalTokenizerComputeWithTxt.computeTokens(GEMINI_FLASH) + .orElse(new ArrayList<>()); + + assertThat(response).isNotEmpty(); + TokensInfo tokensInfo = response.get(0); + + assertThat(tokensInfo.role()).isPresent(); + + assertThat(tokensInfo.tokenIds()).isPresent(); + assertThat(tokensInfo.tokenIds().get()).isNotEmpty(); + + assertThat(tokensInfo.tokens()).isPresent(); + assertThat(tokensInfo.tokens().get()).isNotEmpty(); + } + + @Test + public void testCountTokensLocalTokenizerWithTxt() { + Optional response = CountTokensLocalTokenizerWithTxt.countTokens(GEMINI_FLASH); + assertThat(response).isPresent(); + assertThat(response.get()).isGreaterThan(0); } @Test public void testCountTokensResponseWithText() { Optional response = - CountTokensResponseWithText.countTokens(GEMINI_FLASH); + CountTokensResponseWithText.countTokens(GEMINI_FLASH); assertThat(response).isPresent(); assertThat(response.get().totalTokenCount()).isPresent(); assertThat(response.get().totalTokenCount().get()).isGreaterThan(0); assertThat(response.get().promptTokenCount()).isPresent(); assertThat(response.get().promptTokenCount().get()).isGreaterThan(0); - } }