TIL: Template data processing script with pathlib, fire, joblib, loguru, and tqdm
Template script for processing data with packages I am using most often.
I have added it as a Raycast snippet @script
from pathlib import Path
import fire
from joblib import Parallel, delayed
from loguru import logger
from tqdm import tqdm
# Setup logger to work with tqdm
logger.remove()
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
def main(
n_jobs: int = 1,
):
"""Process data."""
# Setup
data_dir = Path.home() / "data"
sample_paths = sorted(data_dir.iterdir())
logger.info(f"Found {len(sample_paths)} samples to process")
# Parallel processing with joblib
results = Parallel(n_jobs=n_jobs)(
delayed(process)(sample_path)
for sample_path in tqdm(sample_paths, desc="Processing samples")
)
results = [result for result in results if result is not None]
logger.success(f"Processed {len(results)} samples")
def process(
sample_path: Path,
):
"""Process a sample."""
logger.debug(f"Processing {sample_path}")
raise NotImplementedError
if __name__ == "__main__":
fire.Fire(main)
Enjoy Reading This Article?
Here are some more articles you might like to read next: