wildboar.datasets._repository#

Module Contents#

Classes#

Bundle

Base class for handling dataset bundles

JSONRepository

A repository is a collection of bundles

NpBundle

bundle of numpy binary files

Repository

A repository is a collection of bundles

RepositoryCollection

Attributes#

class wildboar.datasets._repository.Bundle(*, key, version, name, tag=None, arrays=None, description=None, collections=None)[source]#

Base class for handling dataset bundles

name[source]#

Human-readable name of the bundle

Type:

str

description[source]#

Description of the bundle

Type:

str

label_index[source]#

Index of the class label(s)

Type:

int or array-like

Parameters:
  • key (str) – A unique key of the bundle

  • version (str) – The version of the bundle

  • name (str) – Human-readable name of the bundle

  • description (str) – Description of the bundle

  • arrays (list) – The arrays of the dataset

get_collection(collection)[source]#
get_filename(version=None, tag=None, ext=None)[source]#
list(archive, collection=None)[source]#

List all datasets in this bundle

Parameters:
  • archive (ZipFile) – The bundle file

  • collection (str, optional) – The collection name

Returns:

dataset_names – A sorted list of datasets in the bundle

Return type:

list

load(name, archive)[source]#

Load a dataset from the bundle

Parameters:
  • name (str) – Name of the dataset

  • archive (ZipFile) – The zip-file bundle

Returns:

  • x (ndarray) – Data samples

  • y (ndarray) – Data labels

  • n_training_samples (int) – Number of samples that are for training. The value is <= x.shape[0]

  • extras (dict, optional) – Extra numpy arrays

class wildboar.datasets._repository.JSONRepository(url)[source]#

Bases: Repository

A repository is a collection of bundles

property download_url[source]#

The url template for downloading bundles

Returns:

str

Return type:

the download url

property identifier[source]#
property name[source]#

Name of the repository

Returns:

str

Return type:

the name of the repository

property version[source]#

The repository version

Returns:

str

Return type:

the version of the repository

property wildboar_requires[source]#

The minimum required wildboar version

Returns:

str

Return type:

the min version

supported_version = '1.1'[source]#
get_bundles()[source]#

Get all bundles

Returns:

dict

Return type:

a dictionary of key and bundle

class wildboar.datasets._repository.NpBundle(*, key, version, name, tag=None, arrays=None, description=None, collections=None)[source]#

Bases: Bundle

bundle of numpy binary files

Parameters:
  • key (str) – A unique key of the bundle

  • version (str) – The version of the bundle

  • name (str) – Human-readable name of the bundle

  • description (str) – Description of the bundle

  • arrays (list) – The arrays of the dataset

class wildboar.datasets._repository.Repository[source]#

A repository is a collection of bundles

property active[source]#
abstract property download_url[source]#

The url template for downloading bundles

Returns:

str

Return type:

the download url

abstract property identifier[source]#
abstract property name[source]#

Name of the repository

Returns:

str

Return type:

the name of the repository

abstract property version[source]#

The repository version

Returns:

str

Return type:

the version of the repository

abstract property wildboar_requires[source]#

The minimum required wildboar version

Returns:

str

Return type:

the min version

__eq__(o)[source]#

Return self==value.

__hash__() int[source]#

Return hash(self).

clear_cache(cache_dir, keep_last_version=True)[source]#
get_bundle(key)[source]#

Get a bundle with the specified key

Parameters:

key (str) – Key of the bundle

Returns:

bundle – A bundle or None

Return type:

Bundle, optional

abstract get_bundles()[source]#

Get all bundles

Returns:

dict

Return type:

a dictionary of key and bundle

list_datasets(bundle, *, cache_dir, collection=None, version=None, tag=None, create_cache_dir=True, progress=True, force=False)[source]#
load_dataset(bundle, dataset, *, cache_dir, version=None, tag=None, create_cache_dir=True, progress=True, force=False)[source]#
refresh(timeout=None)[source]#

Refresh the repository

class wildboar.datasets._repository.RepositoryCollection[source]#
__contains__(item)[source]#
__delitem__(key)[source]#
__getitem__(key)[source]#
__iter__()[source]#
__len__()[source]#
install(repository, refresh=True, timeout=None, cache_dir=None)[source]#
load_repository(repository, cache_dir=None)[source]#
refresh(repository=None, timeout=None, cache_dir=None)[source]#
save_repository(repository, cache_dir=None)[source]#
wildboar.datasets._repository.DEFAULT_TAG = 'default'[source]#