wildboar.datasets._repository#

Module Contents#

Classes#

Repository

A repository is a collection of bundles

JSONRepository

A repository is a collection of bundles

RepositoryCollection

Bundle

Base class for handling dataset bundles

ArffBundle

bundle of .arff-files

NpyBundle

bundle of numpy binary files

Attributes#

wildboar.datasets._repository.DEFAULT_TAG = 'default'#
class wildboar.datasets._repository.Repository#

A repository is a collection of bundles

abstract property name#

Name of the repository

Returns:

str

Return type:

the name of the repository

abstract property version#

The repository version

Returns:

str

Return type:

the version of the repository

abstract property download_url#

The url template for downloading bundles

Returns:

str

Return type:

the download url

abstract property wildboar_requires#

The minimum required wildboar version

Returns:

str

Return type:

the min version

abstract get_bundles()#

Get all bundles

Returns:

dict

Return type:

a dictionary of key and bundle

get_bundle(key)#

Get a bundle with the specified key

Parameters:

key (str) – Key of the bundle

Returns:

bundle – A bundle or None

Return type:

Bundle, optional

load_dataset(bundle, dataset, *, cache_dir, version=None, tag=None, create_cache_dir=True, progress=True, dtype=None, force=False)#
list_datasets(bundle, *, cache_dir, version=None, tag=None, create_cache_dir=True, progress=True, force=False)#
clear_cache(cache_dir, keep_last_version=True)#
refresh()#

Refresh the repository

class wildboar.datasets._repository.JSONRepository(url)#

Bases: Repository

A repository is a collection of bundles

property wildboar_requires#

The minimum required wildboar version

Returns:

str

Return type:

the min version

property name#

Name of the repository

Returns:

str

Return type:

the name of the repository

property version#

The repository version

Returns:

str

Return type:

the version of the repository

property download_url#

The url template for downloading bundles

Returns:

str

Return type:

the download url

get_bundles()#

Get all bundles

Returns:

dict

Return type:

a dictionary of key and bundle

refresh()#

Refresh the repository

class wildboar.datasets._repository.RepositoryCollection#
__getitem__(item)#
__contains__(item)#
__iter__()#
append(item)#
class wildboar.datasets._repository.Bundle(*, key, version, name, description=None, class_index=-1)#

Base class for handling dataset bundles

name#

Human-readable name of the bundle

Type:

str

description#

Description of the bundle

Type:

str

class_index#

Index of the class label(s)

Type:

int or array-like

get_filename(version=None, tag=None, ext=None)#
list(archive)#

List all datasets in this bundle

Parameters:

archive (ZipFile) – The bundle file

Returns:

dataset_names – A sorted list of datasets in the bundle

Return type:

list

load(name, archive, *, dtype=None)#

Load a dataset from the bundle

Parameters:
  • name (str) – Name of the dataset

  • archive (ZipFile) – The zip-file bundle

  • dtype (object, optional) – Cast the data and label matrix to a specific type

Returns:

  • x (ndarray) – Data samples

  • y (ndarray) – Data labels

  • n_training_samples (int) – Number of samples that are for training. The value is <= x.shape[0]

class wildboar.datasets._repository.ArffBundle(*, key, version, name, description=None, class_index=-1, encoding='utf-8')#

Bases: Bundle

bundle of .arff-files

class wildboar.datasets._repository.NpyBundle(*, key, version, name, description=None, class_index=-1)#

Bases: Bundle

bundle of numpy binary files