Source code for capellini.stages.preflight

"""Pre-flight stage: folder initialization and optional fresh-start cleanup."""

from __future__ import annotations

import logging
import os
import shutil
from pathlib import Path

from capellini.config import CapelliniConfig

logger = logging.getLogger(__name__)


[docs] def run_preflight(cfg: CapelliniConfig) -> None: """Create all output folders; if ``fresh_start`` is True wipe previous intermediates. Protected files inside ``input_fasta_folder`` (never deleted): - virus FASTA - metadata file Note: ``progenome16S.fasta`` is *not* protected — it lives bundled inside the package (``capellini/data/references/``) and the MMSeqs2 stage re-derives the working copy in ``input_fasta_folder`` automatically when needed. Args: cfg: Populated CapelliniConfig instance. """ logger.info("Pre-flight: initializing folder structure") folders_to_manage = [ cfg.dada2_folder, cfg.mmseq_folder, cfg.sp_folder, cfg.procs_folder, ] if cfg.fresh_start: logger.info("Fresh start: removing previous intermediates (protected files preserved)") for folder in folders_to_manage: if folder: shutil.rmtree(folder, ignore_errors=True) os.makedirs(folder, exist_ok=True) # Clean input fasta folder but protect user-supplied inputs protected_names: set[str] = set() if cfg.virus_fasta_name: protected_names.add(cfg.virus_fasta_name) if cfg.metadata_path: protected_names.add(Path(cfg.metadata_path).name) input_folder = Path(cfg.input_fasta_folder) if input_folder.exists(): for fp in input_folder.iterdir(): if fp.is_file() and fp.name not in protected_names: fp.unlink() logger.debug("Removed: %s", fp) else: for folder in folders_to_manage: if folder: os.makedirs(folder, exist_ok=True) # Always ensure SpacePHARER subdirectories exist if cfg.sp_folder: for sub in ("spacers", "databases", "output", "tmp"): Path(cfg.sp_folder, sub).mkdir(parents=True, exist_ok=True) # Ensure protein / clustering paths exist if cfg.proteins_extraction_path: os.makedirs(cfg.proteins_extraction_path, exist_ok=True) if cfg.clustering_path: os.makedirs(cfg.clustering_path, exist_ok=True) if cfg.enhanced_networks_folder: os.makedirs(cfg.enhanced_networks_folder, exist_ok=True) if cfg.input_fasta_folder: os.makedirs(cfg.input_fasta_folder, exist_ok=True) logger.info("Pre-flight complete")