`wildboar.datasets._repository`#

Module Contents#

Classes#

`Repository`	A repository is a collection of bundles
`JSONRepository`	A repository is a collection of bundles
`RepositoryCollection`
`Bundle`	Base class for handling dataset bundles
`ArffBundle`	bundle of .arff-files
`NpyBundle`	bundle of numpy binary files

Attributes#

DEFAULT_TAG

wildboar.datasets._repository.DEFAULT_TAG = 'default'#

class wildboar.datasets._repository.Repository#

A repository is a collection of bundles

abstract property name#

Name of the repository

Returns:: str
Return type:: the name of the repository

abstract property version#

The repository version

Returns:: str
Return type:: the version of the repository

abstract property download_url#

The url template for downloading bundles

Returns:: str
Return type:: the download url

abstract property wildboar_requires#

The minimum required wildboar version

Returns:: str
Return type:: the min version

abstract get_bundles()#

Get all bundles

Returns:: dict
Return type:: a dictionary of key and bundle

get_bundle(key)#

Get a bundle with the specified key

Parameters:: key (str) – Key of the bundle
Returns:: bundle – A bundle or None
Return type:: Bundle, optional

load_dataset(bundle, dataset, *, cache_dir, version=None, tag=None, create_cache_dir=True, progress=True, dtype=None, force=False)#

list_datasets(bundle, *, cache_dir, version=None, tag=None, create_cache_dir=True, progress=True, force=False)#

clear_cache(cache_dir, keep_last_version=True)#

refresh()#: Refresh the repository

class wildboar.datasets._repository.JSONRepository(url)#

Bases: Repository

A repository is a collection of bundles

property wildboar_requires#

The minimum required wildboar version

Returns:: str
Return type:: the min version

property name#

Name of the repository

Returns:: str
Return type:: the name of the repository

property version#

The repository version

Returns:: str
Return type:: the version of the repository

property download_url#

The url template for downloading bundles

Returns:: str
Return type:: the download url

get_bundles()#

Get all bundles

Returns:: dict
Return type:: a dictionary of key and bundle

refresh()#: Refresh the repository

class wildboar.datasets._repository.RepositoryCollection#

__getitem__(item)#

__contains__(item)#

__iter__()#

append(item)#

class wildboar.datasets._repository.Bundle(*, key, version, name, description=None, class_index=-1)#

Base class for handling dataset bundles

name#

Human-readable name of the bundle

Type:: str

description#

Description of the bundle

Type:: str

class_index#

Index of the class label(s)

Type:: int or array-like

get_filename(version=None, tag=None, ext=None)#

list(archive)#

List all datasets in this bundle

Parameters:: archive (ZipFile) – The bundle file
Returns:: dataset_names – A sorted list of datasets in the bundle
Return type:: list

load(name, archive, *, dtype=None)#

Load a dataset from the bundle

Parameters:

name (str) – Name of the dataset
archive (ZipFile) – The zip-file bundle
dtype (object, optional) – Cast the data and label matrix to a specific type

Returns:

x (ndarray) – Data samples
y (ndarray) – Data labels
n_training_samples (int) – Number of samples that are for training. The value is <= x.shape[0]

class wildboar.datasets._repository.ArffBundle(*, key, version, name, description=None, class_index=-1, encoding='utf-8')#

Bases: Bundle

bundle of .arff-files

class wildboar.datasets._repository.NpyBundle(*, key, version, name, description=None, class_index=-1)#