Skip to content

Commit 85e9e98

Browse files
committed
support models
1 parent 4fb5188 commit 85e9e98

File tree

2 files changed

+35
-27
lines changed

2 files changed

+35
-27
lines changed

src/lib/sources/huggingFaceSource.ts

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import { getFileName } from './utils.js'
44

55
export const baseUrl = 'https://huggingface.co'
66

7+
function getFullName(url: HFUrl): RepoFullName {
8+
return url.type === 'dataset' ? `datasets/${url.repo}` : url.type === 'space' ? `spaces/${url.repo}` : url.repo
9+
}
710
function getSourceParts(url: HFUrl): SourcePart[] {
811
const fullName = getFullName(url)
912
const sourceParts: SourcePart[] = [{
@@ -30,9 +33,6 @@ function getSourceParts(url: HFUrl): SourcePart[] {
3033
function getPrefix(url: DirectoryUrl): string {
3134
return `${url.origin}/${getFullName(url)}/tree/${url.branch}${url.path}`.replace(/\/$/, '')
3235
}
33-
function getFullName(url: HFUrl): RepoFullName {
34-
return url.type === 'dataset' ? `datasets/${url.repo}` : url.type === 'space' ? `spaces/${url.repo}` : url.repo
35-
}
3636
async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: RequestInit, accessToken?: string}): Promise<FileMetadata[]> {
3737
const filesIterator = listFiles({
3838
repo: {
@@ -141,15 +141,25 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
141141
throw new Error('Not a Hugging Face URL')
142142
}
143143

144-
const repoGroups = /^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/?$/.exec(
145-
urlObject.pathname
144+
let { pathname } = urlObject
145+
let type: RepoType = 'model'
146+
if (pathname.startsWith('/datasets')) {
147+
type = 'dataset'
148+
pathname = pathname.slice('/datasets'.length)
149+
} else if (pathname.startsWith('/spaces')) {
150+
type = 'space'
151+
pathname = pathname.slice('/spaces'.length)
152+
}
153+
154+
const repoGroups = /^\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/?$/.exec(
155+
pathname
146156
)?.groups
147-
if (repoGroups?.type !== undefined && repoGroups.namespace !== undefined && repoGroups.repo !== undefined) {
157+
if (repoGroups?.namespace !== undefined && repoGroups.repo !== undefined) {
148158
return {
149159
kind: 'directory',
150160
source: url,
151161
origin: urlObject.origin,
152-
type: parseRepoType(repoGroups.type.slice(1)),
162+
type,
153163
repo: repoGroups.namespace + '/' + repoGroups.repo,
154164
action: 'tree',
155165
branch: 'main', // hardcode the default branch
@@ -158,25 +168,25 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
158168
}
159169

160170
const folderGroups =
161-
/^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>tree)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)*)\/?$/.exec(
162-
urlObject.pathname
171+
/^\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>tree)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)*)\/?$/.exec(
172+
pathname
163173
)?.groups
164174
if (
165-
folderGroups?.type !== undefined &&
166-
folderGroups.namespace !== undefined &&
175+
folderGroups?.namespace !== undefined &&
167176
folderGroups.repo !== undefined &&
168177
folderGroups.action !== undefined &&
169178
folderGroups.branch !== undefined &&
170179
folderGroups.path !== undefined &&
171180
folderGroups.branch !== 'refs'
172181
) {
182+
const typePath = type === 'dataset' ? '/datasets' : type === 'space' ? '/spaces' : ''
173183
const branch = folderGroups.branch.replace(/\//g, '%2F')
174-
const source = `${urlObject.origin}${folderGroups.type}/${folderGroups.namespace}/${folderGroups.repo}/${folderGroups.action}/${branch}${folderGroups.path}`
184+
const source = `${urlObject.origin}${typePath}/${folderGroups.namespace}/${folderGroups.repo}/${folderGroups.action}/${branch}${folderGroups.path}`
175185
return {
176186
kind: 'directory',
177187
source,
178188
origin: urlObject.origin,
179-
type: parseRepoType(folderGroups.type.slice(1)),
189+
type,
180190
repo: folderGroups.namespace + '/' + folderGroups.repo,
181191
action: 'tree',
182192
branch,
@@ -185,30 +195,30 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
185195
}
186196

187197
const fileGroups =
188-
/^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>blob|resolve)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)+)$/.exec(
189-
urlObject.pathname
198+
/^\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>blob|resolve)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)+)$/.exec(
199+
pathname
190200
)?.groups
191201
if (
192-
fileGroups?.type !== undefined &&
193-
fileGroups.namespace !== undefined &&
202+
fileGroups?.namespace !== undefined &&
194203
fileGroups.repo !== undefined &&
195204
fileGroups.action !== undefined &&
196205
fileGroups.branch !== undefined &&
197206
fileGroups.path !== undefined &&
198207
fileGroups.branch !== 'refs'
199208
) {
209+
const typePath = type === 'dataset' ? '/datasets' : type === 'space' ? '/spaces' : ''
200210
const branch = fileGroups.branch.replace(/\//g, '%2F')
201-
const source = `${urlObject.origin}${fileGroups.type}/${fileGroups.namespace}/${fileGroups.repo}/${fileGroups.action}/${branch}${fileGroups.path}`
211+
const source = `${urlObject.origin}${typePath}/${fileGroups.namespace}/${fileGroups.repo}/${fileGroups.action}/${branch}${fileGroups.path}`
202212
return {
203213
kind: 'file',
204214
source,
205215
origin: urlObject.origin,
206-
type: parseRepoType(fileGroups.type.slice(1)),
216+
type,
207217
repo: fileGroups.namespace + '/' + fileGroups.repo,
208218
action: fileGroups.action === 'blob' ? 'blob' : 'resolve',
209219
branch,
210220
path: fileGroups.path,
211-
resolveUrl: `${urlObject.origin}${fileGroups.type}/${fileGroups.namespace}/${fileGroups.repo}/resolve/${branch}${fileGroups.path}`,
221+
resolveUrl: `${urlObject.origin}${typePath}/${fileGroups.namespace}/${fileGroups.repo}/resolve/${branch}${fileGroups.path}`,
212222
}
213223
}
214224

test/lib/sources/huggingFaceSource.test.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,11 @@ describe('parseHuggingFaceUrl', () => {
3939
'/anything',
4040
'/tasks',
4141
'/models',
42-
'/namespace/model', // TODO(SL): support model
43-
'/settings/profile', // TODO(SL): add a block/allow list?
42+
'/spaces',
4443
'/datasets',
4544
'/datasets/',
4645
'/datasets/namespace',
4746
'/datasets/namespace/',
48-
'/spaces',
49-
'/spaces/namespace',
5047
'/datasets/namespace/repo/branch',
5148
'/datasets/namespace/repo/tree',
5249
'/datasets/namespace/repo/tree/',
@@ -67,7 +64,7 @@ describe('parseHuggingFaceUrl', () => {
6764
test.for([
6865
{ type: 'dataset', typePath: 'datasets/' },
6966
{ type: 'space', typePath: 'spaces/' },
70-
// { type: 'model', typePath: '' },
67+
{ type: 'model', typePath: '' },
7168
].flatMap(({ type, typePath }) => [
7269
// Root directory
7370
[
@@ -166,7 +163,7 @@ describe('parseHuggingFaceUrl', () => {
166163
'/folder.parquet',
167164
],
168165
]))(
169-
'parses a DirectoryUrl for dataset/space/model root or subdirectory: %s',
166+
'parses a DirectoryUrl for $type root or subdirectory: %s',
170167
([url, source, repo, type, branch, path]) => {
171168
expect(parseHuggingFaceUrl(url)).toEqual({
172169
kind: 'directory',
@@ -187,6 +184,7 @@ describe('parseHuggingFaceUrl', () => {
187184
[
188185
{ type: 'dataset', typePath: 'datasets/' },
189186
{ type: 'space', typePath: 'spaces/' },
187+
{ type: 'model', typePath: '' },
190188
].flatMap(d => [
191189
{ ...d, branch: 'branch', sanitizedBranch: 'branch' },
192190
{ ...d, branch: 'refs/convert/parquet', sanitizedBranch: 'refs%2Fconvert%2Fparquet' },
@@ -197,7 +195,7 @@ describe('parseHuggingFaceUrl', () => {
197195
]).flatMap(d => [
198196
{ ...d, url: `https://huggingface.co/${d.typePath}${repo}/${d.action}/${d.branch}${path}` },
199197
]))(
200-
'parses a FileUrl for dataset/space/model file URL: $url',
198+
'parses a FileUrl for $type file URL: $url',
201199
({ type, typePath, sanitizedBranch, action, url }) => {
202200
const source = `https://huggingface.co/${typePath}${repo}/${action}/${sanitizedBranch}${path}`
203201
const resolveUrl = `https://huggingface.co/${typePath}${repo}/resolve/${sanitizedBranch}${path}`

0 commit comments

Comments
 (0)