From bf68d14535b12cf0cecc3364be7627ac919a2a5e Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Mon, 22 Sep 2025 08:18:51 +0000 Subject: [PATCH 1/2] upload with xet by default --- packages/hub/src/lib/commit.ts | 37 +++++++++--------------- packages/hub/src/types/api/api-commit.ts | 4 +-- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/packages/hub/src/lib/commit.ts b/packages/hub/src/lib/commit.ts index e36be4965c..f07689c19e 100644 --- a/packages/hub/src/lib/commit.ts +++ b/packages/hub/src/lib/commit.ts @@ -120,8 +120,13 @@ export type CommitParams = { */ fetch?: typeof fetch; abortSignal?: AbortSignal; - // Credentials are optional due to custom fetch functions or cookie auth + /** + * @default true + * + * Use xet protocol: https://huggingface.co/blog/xet-on-the-hub to upload, rather than a basic S3 PUT + */ useXet?: boolean; + // Credentials are optional due to custom fetch functions or cookie auth } & Partial; export interface CommitOutput { @@ -165,24 +170,7 @@ export async function* commitIter(params: CommitParams): AsyncGenerator(); @@ -206,10 +194,6 @@ export async function* commitIter(params: CommitParams): AsyncGenerator { - if (operation.operation === "edit" && !useXet) { - throw new Error("Edit operation is not supported when Xet is disabled"); - } - if (operation.operation === "edit") { // Convert EditFile operation to a file operation with SplicedBlob const splicedBlob = SplicedBlob.create( @@ -325,7 +309,7 @@ export async function* commitIter(params: CommitParams): AsyncGenerator [shas[i], op])); + if (useXet && json.transfer !== "xet") { + useXet = false; + } + if (useXet) { // First get all the files that are already uploaded out of the way for (const obj of json.objects) { @@ -396,6 +384,7 @@ export async function* commitIter(params: CommitParams): AsyncGenerator; /** * Optional object describing the server ref that the objects belong to. Note: Added in v2.4. * @@ -29,7 +29,7 @@ export interface ApiLfsBatchResponse { objects: ApiLfsResponseObject[]; } -export type ApiLfsResponseTransfer = "basic" | "multipart"; +export type ApiLfsResponseTransfer = "basic" | "multipart" | "xet"; export interface ApiLfsCompleteMultipartRequest { oid: string; From daf989f872d75283aa7b0a6bdddaac8b502499e8 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 31 Oct 2025 16:31:39 +0100 Subject: [PATCH 2/2] extract data from git lfs resp --- packages/hub/src/lib/commit.ts | 86 +++++++++++++++---------- packages/hub/src/utils/uploadShards.ts | 4 ++ packages/hub/src/utils/xetWriteToken.ts | 8 ++- 3 files changed, 61 insertions(+), 37 deletions(-) diff --git a/packages/hub/src/lib/commit.ts b/packages/hub/src/lib/commit.ts index f07689c19e..5d69be5e7f 100644 --- a/packages/hub/src/lib/commit.ts +++ b/packages/hub/src/lib/commit.ts @@ -350,6 +350,8 @@ export async function* commitIter(params: CommitParams): AsyncGenerator - Promise.all( - sources.map(async function (source) { - for await (const event of uploadShards(source, { - fetch: params.fetch, - accessToken, - hubUrl: params.hubUrl ?? HUB_URL, - repo: repoId, - // todo: maybe leave empty if PR? - rev: params.branch ?? "main", - isPullRequest: params.isPullRequest, - yieldCallback: (event) => yieldCallback({ ...event, state: "uploading" }), - })) { - if (event.event === "file") { - yieldCallback({ - event: "fileProgress" as const, - path: event.path, - progress: 1, - state: "uploading" as const, - }); - } else if (event.event === "fileProgress") { - yieldCallback({ - event: "fileProgress" as const, - path: event.path, - progress: event.progress, - state: "uploading" as const, - }); + if (xetRefreshWriteTokenUrl) { + const xetRefreshWriteTokenUrlFixed = xetRefreshWriteTokenUrl; + const sources = splitAsyncGenerator(source, 5); + yield* eventToGenerator((yieldCallback, returnCallback, rejectCallback) => + Promise.all( + sources.map(async function (source) { + for await (const event of uploadShards(source, { + fetch: params.fetch, + accessToken, + hubUrl: params.hubUrl ?? HUB_URL, + repo: repoId, + xetRefreshWriteTokenUrl: xetRefreshWriteTokenUrlFixed, + xetSessionId, + // todo: maybe leave empty if PR? + rev: params.branch ?? "main", + isPullRequest: params.isPullRequest, + yieldCallback: (event) => yieldCallback({ ...event, state: "uploading" }), + })) { + if (event.event === "file") { + yieldCallback({ + event: "fileProgress" as const, + path: event.path, + progress: 1, + state: "uploading" as const, + }); + } else if (event.event === "fileProgress") { + yieldCallback({ + event: "fileProgress" as const, + path: event.path, + progress: event.progress, + state: "uploading" as const, + }); + } } - } - }) - ).then(() => returnCallback(undefined), rejectCallback) - ); + }) + ).then(() => returnCallback(undefined), rejectCallback) + ); + } else { + // No LFS file to upload + } } else { yield* eventToGenerator((yieldCallback, returnCallback, rejectCallback) => { return promisesQueueStreaming( diff --git a/packages/hub/src/utils/uploadShards.ts b/packages/hub/src/utils/uploadShards.ts index cc244fe5c7..66205b82e8 100644 --- a/packages/hub/src/utils/uploadShards.ts +++ b/packages/hub/src/utils/uploadShards.ts @@ -54,6 +54,8 @@ export const SHARD_MAGIC_TAG = new Uint8Array([ interface UploadShardsParams { accessToken: string | undefined; hubUrl: string; + xetRefreshWriteTokenUrl: string; + xetSessionId: string | undefined; fetch?: typeof fetch; repo: RepoId; rev: string; @@ -365,6 +367,7 @@ async function uploadXorb( body: xorb.xorb, headers: { Authorization: `Bearer ${token.accessToken}`, + ...(params.xetSessionId ? { "X-Xet-Session-Id": params.xetSessionId } : {}), }, ...{ progressHint: { @@ -394,6 +397,7 @@ async function uploadShard(shard: Uint8Array, params: UploadShardsParams) { body: shard, headers: { Authorization: `Bearer ${token.accessToken}`, + ...(params.xetSessionId ? { "X-Xet-Session-Id": params.xetSessionId } : {}), }, }); diff --git a/packages/hub/src/utils/xetWriteToken.ts b/packages/hub/src/utils/xetWriteToken.ts index dbb37b61bd..e06e2a078c 100644 --- a/packages/hub/src/utils/xetWriteToken.ts +++ b/packages/hub/src/utils/xetWriteToken.ts @@ -8,6 +8,7 @@ export interface XetWriteTokenParams { repo: RepoId; rev: string; isPullRequest?: boolean; + xetRefreshWriteTokenUrl: string | undefined; } const JWT_SAFETY_PERIOD = 60_000; @@ -47,9 +48,10 @@ export async function xetWriteToken(params: XetWriteTokenParams): Promise<{ acce const promise = (async () => { const resp = await (params.fetch ?? fetch)( - `${params.hubUrl}/api/${params.repo.type}s/${params.repo.name}/xet-write-token/${encodeURIComponent( - params.rev - )}` + (params.isPullRequest ? "?create_pr=1" : ""), + params.xetRefreshWriteTokenUrl ?? + `${params.hubUrl}/api/${params.repo.type}s/${params.repo.name}/xet-write-token/${encodeURIComponent( + params.rev + )}` + (params.isPullRequest ? "?create_pr=1" : ""), { headers: params.accessToken ? {