Module robofish.io.io
Expand source code
import robofish.io
import datetime
from typing import Union, Iterable
from pathlib import Path
import logging
import numpy as np
import pandas
import random
import deprecation
def now_iso8061() -> str:
"""The current time as iso8061 string.
Returns:
str: The current time as iso8061 string.
"""
return datetime.datetime.now(datetime.timezone.utc).isoformat(
timespec="microseconds"
)
@deprecation.deprecated(
deprecated_in="0.2.10",
removed_in="0.3",
details="Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.",
)
def read_multiple_files(
paths: Union[Path, str, Iterable[Path], Iterable[str]],
strict_validate: bool = False,
max_files: int = None,
) -> dict:
"""Load hdf5 files from a given path.
The function can be given the path to a single single hdf5 file, to a folder,
containing hdf5 files, or an array of multiple files or folders.
Args:
path: The path to a hdf5 file or folder.
strict_validate: Choice between error and warning in case of invalidity
max_files: Maximum number of files to be read
Returns:
dict: A dictionary where the keys are filenames and the opened robofish.io.File objects
"""
logging.info(f"Reading files from path {paths}")
list_types = (list, np.ndarray, pandas.core.series.Series)
if not isinstance(paths, list_types):
paths = [paths]
paths = [Path(p) for p in paths]
sf_dict = {}
for path in paths:
assert (
path is not None and path.exists()
), f"Path does not exists {path.resolve()}."
if path.is_dir():
logging.info("found dir %s" % path)
# Find all hdf5 files in folder
files = []
for ext in ("hdf", "hdf5", "h5", "he5"):
files += list(path.rglob(f"*.{ext}"))
files = sorted(files)
logging.info("Reading files")
for file in files:
sf_dict.update(
{file: robofish.io.File(path=file, strict_validate=strict_validate)}
)
if max_files is not None and len(sf_dict) >= max_files:
break
else:
logging.info("found file %s" % path)
sf_dict[path] = robofish.io.File(path=path, strict_validate=strict_validate)
assert len(sf_dict) > 0, f"No files found in given Paths. {paths}"
return sf_dict
def read_property_from_multiple_files(
paths: Union[Path, str, Iterable[Path], Iterable[str]],
entity_property: property = None,
*,
strict_validate: bool = False,
max_files: int = None,
shuffle: bool = False,
predicate: callable = None,
):
"""Load hdf5 files from a given path and return the property of the entities.
The function can be given the path to a single single hdf5 file, to a folder,
containing hdf5 files, or an array of multiple files or folders.
Args:
path: The path to a hdf5 file or folder.
entity_property: A property of robofish.io.Entity default is Entity.poses_rad
strict_validate: Choice between error and warning in case of invalidity
max_files: Maximum number of files to be read
shuffle: Shuffle the order of files
predicate:
Returns:
An array of all entity properties arrays
"""
assert (
entity_property is not None
), "Please select an entity property e.g. 'Entity.poses_rad'"
logging.info(f"Reading files from path {paths}")
list_types = (list, np.ndarray, pandas.core.series.Series)
if not isinstance(paths, list_types):
paths = [paths]
paths = [Path(p) for p in paths]
poses_array = []
for path in paths:
if path.is_dir():
logging.info("found dir %s" % path)
# Find all hdf5 files in folder
files = []
for ext in ("hdf", "hdf5", "h5", "he5"):
files += list(path.rglob(f"*.{ext}"))
files = random.shuffle(files) if shuffle else sorted(files)
logging.info("Reading files")
for file in files:
if max_files is not None and len(poses_array) >= max_files:
break
if not file.is_dir():
with robofish.io.File(
path=file, strict_validate=strict_validate
) as f:
p = f.select_entity_property(predicate, entity_property)
poses_array.append(p)
elif path is not None and path.exists():
logging.info("found file %s" % path)
with robofish.io.File(path=path, strict_validate=strict_validate) as f:
p = f.select_entity_property(predicate, entity_property)
poses_array.append(p)
return poses_array
Functions
def now_iso8061() ‑> str
-
The current time as iso8061 string.
Returns
str
- The current time as iso8061 string.
Expand source code
def now_iso8061() -> str: """The current time as iso8061 string. Returns: str: The current time as iso8061 string. """ return datetime.datetime.now(datetime.timezone.utc).isoformat( timespec="microseconds" )
def read_multiple_files(paths: Union[pathlib.Path, str, Iterable[pathlib.Path], Iterable[str]], strict_validate: bool = False, max_files: int = None) ‑> dict
-
Load hdf5 files from a given path.
The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders.
Args
path
- The path to a hdf5 file or folder.
strict_validate
- Choice between error and warning in case of invalidity
max_files
- Maximum number of files to be read
Returns
dict
- A dictionary where the keys are filenames and the opened robofish.io.File objects
Deprecated since version: 0.2.10
This will be removed in 0.3. Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.
Expand source code
@deprecation.deprecated( deprecated_in="0.2.10", removed_in="0.3", details="Loading all files first and then handling them is slow, memory intensive and inconvenient. Don't use this method.", ) def read_multiple_files( paths: Union[Path, str, Iterable[Path], Iterable[str]], strict_validate: bool = False, max_files: int = None, ) -> dict: """Load hdf5 files from a given path. The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders. Args: path: The path to a hdf5 file or folder. strict_validate: Choice between error and warning in case of invalidity max_files: Maximum number of files to be read Returns: dict: A dictionary where the keys are filenames and the opened robofish.io.File objects """ logging.info(f"Reading files from path {paths}") list_types = (list, np.ndarray, pandas.core.series.Series) if not isinstance(paths, list_types): paths = [paths] paths = [Path(p) for p in paths] sf_dict = {} for path in paths: assert ( path is not None and path.exists() ), f"Path does not exists {path.resolve()}." if path.is_dir(): logging.info("found dir %s" % path) # Find all hdf5 files in folder files = [] for ext in ("hdf", "hdf5", "h5", "he5"): files += list(path.rglob(f"*.{ext}")) files = sorted(files) logging.info("Reading files") for file in files: sf_dict.update( {file: robofish.io.File(path=file, strict_validate=strict_validate)} ) if max_files is not None and len(sf_dict) >= max_files: break else: logging.info("found file %s" % path) sf_dict[path] = robofish.io.File(path=path, strict_validate=strict_validate) assert len(sf_dict) > 0, f"No files found in given Paths. {paths}" return sf_dict
def read_property_from_multiple_files(paths: Union[pathlib.Path, str, Iterable[pathlib.Path], Iterable[str]], entity_property: property = None, *, strict_validate: bool = False, max_files: int = None, shuffle: bool = False, predicate:
= None) -
Load hdf5 files from a given path and return the property of the entities.
The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders.
Args
path
- The path to a hdf5 file or folder.
entity_property
- A property of robofish.io.Entity default is Entity.poses_rad
strict_validate
- Choice between error and warning in case of invalidity
max_files
- Maximum number of files to be read
shuffle
- Shuffle the order of files
predicate:
Returns
An array of all entity properties arrays
Expand source code
def read_property_from_multiple_files( paths: Union[Path, str, Iterable[Path], Iterable[str]], entity_property: property = None, *, strict_validate: bool = False, max_files: int = None, shuffle: bool = False, predicate: callable = None, ): """Load hdf5 files from a given path and return the property of the entities. The function can be given the path to a single single hdf5 file, to a folder, containing hdf5 files, or an array of multiple files or folders. Args: path: The path to a hdf5 file or folder. entity_property: A property of robofish.io.Entity default is Entity.poses_rad strict_validate: Choice between error and warning in case of invalidity max_files: Maximum number of files to be read shuffle: Shuffle the order of files predicate: Returns: An array of all entity properties arrays """ assert ( entity_property is not None ), "Please select an entity property e.g. 'Entity.poses_rad'" logging.info(f"Reading files from path {paths}") list_types = (list, np.ndarray, pandas.core.series.Series) if not isinstance(paths, list_types): paths = [paths] paths = [Path(p) for p in paths] poses_array = [] for path in paths: if path.is_dir(): logging.info("found dir %s" % path) # Find all hdf5 files in folder files = [] for ext in ("hdf", "hdf5", "h5", "he5"): files += list(path.rglob(f"*.{ext}")) files = random.shuffle(files) if shuffle else sorted(files) logging.info("Reading files") for file in files: if max_files is not None and len(poses_array) >= max_files: break if not file.is_dir(): with robofish.io.File( path=file, strict_validate=strict_validate ) as f: p = f.select_entity_property(predicate, entity_property) poses_array.append(p) elif path is not None and path.exists(): logging.info("found file %s" % path) with robofish.io.File(path=path, strict_validate=strict_validate) as f: p = f.select_entity_property(predicate, entity_property) poses_array.append(p) return poses_array