Module robofish.io.io

Expand source code
import robofish.io
import datetime
from typing import Union, Iterable
from pathlib import Path
import logging

import numpy as np
import pandas
import random
import deprecation


def now_iso8061() -> str:
    """The current time as iso8061 string.

    Returns:
        str: The current time as iso8061 string.
    """
    return datetime.datetime.now(datetime.timezone.utc).isoformat(
        timespec="microseconds"
    )


@deprecation.deprecated(
    deprecated_in="0.2.10",
    removed_in="0.3",
    details="Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.",
)
def read_multiple_files(
    paths: Union[Path, str, Iterable[Path], Iterable[str]],
    strict_validate: bool = False,
    max_files: int = None,
) -> dict:

    """Load hdf5 files from a given path.

    The function can be given the path to a single single hdf5 file, to a folder,
    containing hdf5 files, or an array of multiple files or folders.

    Args:
        path: The path to a hdf5 file or folder.
        strict_validate: Choice between error and warning in case of invalidity
        max_files: Maximum number of files to be read
    Returns:
        dict: A dictionary where the keys are filenames and the opened robofish.io.File objects
    """

    logging.info(f"Reading files from path {paths}")

    list_types = (list, np.ndarray, pandas.core.series.Series)
    if not isinstance(paths, list_types):
        paths = [paths]

    paths = [Path(p) for p in paths]

    sf_dict = {}
    for path in paths:
        assert (
            path is not None and path.exists()
        ), f"Path does not exists {path.resolve()}."

        if path.is_dir():
            logging.info("found dir %s" % path)
            # Find all hdf5 files in folder
            files = []
            for ext in ("hdf", "hdf5", "h5", "he5"):
                files += list(path.rglob(f"*.{ext}"))
            files = sorted(files)

            logging.info("Reading files")

            for file in files:
                sf_dict.update(
                    {file: robofish.io.File(path=file, strict_validate=strict_validate)}
                )
                if max_files is not None and len(sf_dict) >= max_files:
                    break
        else:
            logging.info("found file %s" % path)
            sf_dict[path] = robofish.io.File(path=path, strict_validate=strict_validate)

    assert len(sf_dict) > 0, f"No files found in given Paths. {paths}"

    return sf_dict


def read_property_from_multiple_files(
    paths: Union[Path, str, Iterable[Path], Iterable[str]],
    entity_property: property = None,
    *,
    strict_validate: bool = False,
    max_files: int = None,
    shuffle: bool = False,
    predicate: callable = None,
):
    """Load hdf5 files from a given path and return the property of the entities.

    The function can be given the path to a single single hdf5 file, to a folder,
    containing hdf5 files, or an array of multiple files or folders.

    Args:
        path: The path to a hdf5 file or folder.
        entity_property: A property of robofish.io.Entity default is Entity.poses_rad
        strict_validate: Choice between error and warning in case of invalidity
        max_files: Maximum number of files to be read
        shuffle: Shuffle the order of files
        predicate:
    Returns:
        An array of all entity properties arrays
    """

    assert (
        entity_property is not None
    ), "Please select an entity property e.g. 'Entity.poses_rad'"
    logging.info(f"Reading files from path {paths}")

    list_types = (list, np.ndarray, pandas.core.series.Series)
    if not isinstance(paths, list_types):
        paths = [paths]

    paths = [Path(p) for p in paths]

    poses_array = []
    for path in paths:
        if path.is_dir():

            logging.info("found dir %s" % path)
            # Find all hdf5 files in folder
            files = []
            for ext in ("hdf", "hdf5", "h5", "he5"):
                files += list(path.rglob(f"*.{ext}"))
            files = random.shuffle(files) if shuffle else sorted(files)

            logging.info("Reading files")

            for file in files:
                if max_files is not None and len(poses_array) >= max_files:
                    break

                if not file.is_dir():
                    with robofish.io.File(
                        path=file, strict_validate=strict_validate
                    ) as f:
                        p = f.select_entity_property(predicate, entity_property)
                        poses_array.append(p)

        elif path is not None and path.exists():
            logging.info("found file %s" % path)
            with robofish.io.File(path=path, strict_validate=strict_validate) as f:
                p = f.select_entity_property(predicate, entity_property)
                poses_array.append(p)
    return poses_array

Functions

def now_iso8061() ‑> str

The current time as iso8061 string.

Returns

str
The current time as iso8061 string.
Expand source code
def now_iso8061() -> str:
    """The current time as iso8061 string.

    Returns:
        str: The current time as iso8061 string.
    """
    return datetime.datetime.now(datetime.timezone.utc).isoformat(
        timespec="microseconds"
    )
def read_multiple_files(paths: Union[pathlib.Path, str, Iterable[pathlib.Path], Iterable[str]], strict_validate: bool = False, max_files: int = None) ‑> dict

Load hdf5 files from a given path.

The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders.

Args

path
The path to a hdf5 file or folder.
strict_validate
Choice between error and warning in case of invalidity
max_files
Maximum number of files to be read

Returns

dict
A dictionary where the keys are filenames and the opened robofish.io.File objects

Deprecated since version: 0.2.10

This will be removed in 0.3. Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.

Expand source code
@deprecation.deprecated(
    deprecated_in="0.2.10",
    removed_in="0.3",
    details="Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.",
)
def read_multiple_files(
    paths: Union[Path, str, Iterable[Path], Iterable[str]],
    strict_validate: bool = False,
    max_files: int = None,
) -> dict:

    """Load hdf5 files from a given path.

    The function can be given the path to a single single hdf5 file, to a folder,
    containing hdf5 files, or an array of multiple files or folders.

    Args:
        path: The path to a hdf5 file or folder.
        strict_validate: Choice between error and warning in case of invalidity
        max_files: Maximum number of files to be read
    Returns:
        dict: A dictionary where the keys are filenames and the opened robofish.io.File objects
    """

    logging.info(f"Reading files from path {paths}")

    list_types = (list, np.ndarray, pandas.core.series.Series)
    if not isinstance(paths, list_types):
        paths = [paths]

    paths = [Path(p) for p in paths]

    sf_dict = {}
    for path in paths:
        assert (
            path is not None and path.exists()
        ), f"Path does not exists {path.resolve()}."

        if path.is_dir():
            logging.info("found dir %s" % path)
            # Find all hdf5 files in folder
            files = []
            for ext in ("hdf", "hdf5", "h5", "he5"):
                files += list(path.rglob(f"*.{ext}"))
            files = sorted(files)

            logging.info("Reading files")

            for file in files:
                sf_dict.update(
                    {file: robofish.io.File(path=file, strict_validate=strict_validate)}
                )
                if max_files is not None and len(sf_dict) >= max_files:
                    break
        else:
            logging.info("found file %s" % path)
            sf_dict[path] = robofish.io.File(path=path, strict_validate=strict_validate)

    assert len(sf_dict) > 0, f"No files found in given Paths. {paths}"

    return sf_dict
def read_property_from_multiple_files(paths: Union[pathlib.Path, str, Iterable[pathlib.Path], Iterable[str]], entity_property: property = None, *, strict_validate: bool = False, max_files: int = None, shuffle: bool = False, predicate:  = None)

Load hdf5 files from a given path and return the property of the entities.

The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders.

Args

path
The path to a hdf5 file or folder.
entity_property
A property of robofish.io.Entity default is Entity.poses_rad
strict_validate
Choice between error and warning in case of invalidity
max_files
Maximum number of files to be read
shuffle
Shuffle the order of files

predicate:

Returns

An array of all entity properties arrays

Expand source code
def read_property_from_multiple_files(
    paths: Union[Path, str, Iterable[Path], Iterable[str]],
    entity_property: property = None,
    *,
    strict_validate: bool = False,
    max_files: int = None,
    shuffle: bool = False,
    predicate: callable = None,
):
    """Load hdf5 files from a given path and return the property of the entities.

    The function can be given the path to a single single hdf5 file, to a folder,
    containing hdf5 files, or an array of multiple files or folders.

    Args:
        path: The path to a hdf5 file or folder.
        entity_property: A property of robofish.io.Entity default is Entity.poses_rad
        strict_validate: Choice between error and warning in case of invalidity
        max_files: Maximum number of files to be read
        shuffle: Shuffle the order of files
        predicate:
    Returns:
        An array of all entity properties arrays
    """

    assert (
        entity_property is not None
    ), "Please select an entity property e.g. 'Entity.poses_rad'"
    logging.info(f"Reading files from path {paths}")

    list_types = (list, np.ndarray, pandas.core.series.Series)
    if not isinstance(paths, list_types):
        paths = [paths]

    paths = [Path(p) for p in paths]

    poses_array = []
    for path in paths:
        if path.is_dir():

            logging.info("found dir %s" % path)
            # Find all hdf5 files in folder
            files = []
            for ext in ("hdf", "hdf5", "h5", "he5"):
                files += list(path.rglob(f"*.{ext}"))
            files = random.shuffle(files) if shuffle else sorted(files)

            logging.info("Reading files")

            for file in files:
                if max_files is not None and len(poses_array) >= max_files:
                    break

                if not file.is_dir():
                    with robofish.io.File(
                        path=file, strict_validate=strict_validate
                    ) as f:
                        p = f.select_entity_property(predicate, entity_property)
                        poses_array.append(p)

        elif path is not None and path.exists():
            logging.info("found file %s" % path)
            with robofish.io.File(path=path, strict_validate=strict_validate) as f:
                p = f.select_entity_property(predicate, entity_property)
                poses_array.append(p)
    return poses_array