"""Pre-flight stage: folder initialization and optional fresh-start cleanup."""
from __future__ import annotations
import logging
import os
import shutil
from pathlib import Path
from capellini.config import CapelliniConfig
logger = logging.getLogger(__name__)
[docs]
def run_preflight(cfg: CapelliniConfig) -> None:
"""Create all output folders; if ``fresh_start`` is True wipe previous intermediates.
Protected files inside ``input_fasta_folder`` (never deleted):
- virus FASTA
- metadata file
Note: ``progenome16S.fasta`` is *not* protected — it lives bundled inside
the package (``capellini/data/references/``) and the MMSeqs2 stage
re-derives the working copy in ``input_fasta_folder`` automatically when
needed.
Args:
cfg: Populated CapelliniConfig instance.
"""
logger.info("Pre-flight: initializing folder structure")
folders_to_manage = [
cfg.dada2_folder,
cfg.mmseq_folder,
cfg.sp_folder,
cfg.procs_folder,
]
if cfg.fresh_start:
logger.info("Fresh start: removing previous intermediates (protected files preserved)")
for folder in folders_to_manage:
if folder:
shutil.rmtree(folder, ignore_errors=True)
os.makedirs(folder, exist_ok=True)
# Clean input fasta folder but protect user-supplied inputs
protected_names: set[str] = set()
if cfg.virus_fasta_name:
protected_names.add(cfg.virus_fasta_name)
if cfg.metadata_path:
protected_names.add(Path(cfg.metadata_path).name)
input_folder = Path(cfg.input_fasta_folder)
if input_folder.exists():
for fp in input_folder.iterdir():
if fp.is_file() and fp.name not in protected_names:
fp.unlink()
logger.debug("Removed: %s", fp)
else:
for folder in folders_to_manage:
if folder:
os.makedirs(folder, exist_ok=True)
# Always ensure SpacePHARER subdirectories exist
if cfg.sp_folder:
for sub in ("spacers", "databases", "output", "tmp"):
Path(cfg.sp_folder, sub).mkdir(parents=True, exist_ok=True)
# Ensure protein / clustering paths exist
if cfg.proteins_extraction_path:
os.makedirs(cfg.proteins_extraction_path, exist_ok=True)
if cfg.clustering_path:
os.makedirs(cfg.clustering_path, exist_ok=True)
if cfg.enhanced_networks_folder:
os.makedirs(cfg.enhanced_networks_folder, exist_ok=True)
if cfg.input_fasta_folder:
os.makedirs(cfg.input_fasta_folder, exist_ok=True)
logger.info("Pre-flight complete")