"""Pre-flight stage: folder initialization and optional fresh-start cleanup."""
from __future__ import annotations
import logging
import os
import shutil
from pathlib import Path
from capellini.config import CapelliniConfig
logger = logging.getLogger(__name__)
# Bundled reference paths (never deleted by fresh_start)
_BUNDLED_16S = Path(__file__).parent.parent / "data" / "references" / "progenome16S.fasta"
_BUNDLED_SPACERS = (
Path(__file__).parent.parent / "data" / "references" / "spacers" / "spacers_CompleteCollection.fasta"
)
[docs]
def run_preflight(cfg: CapelliniConfig) -> None:
"""Create all output folders; if fresh_start=True delete previous intermediates.
Protected files (never deleted):
- virus FASTA in input_fasta_folder
- bundled progenome16S.fasta (if present)
- metadata file
Args:
cfg: Populated CapelliniConfig instance.
"""
logger.info("Pre-flight: initializing folder structure")
folders_to_manage = [
cfg.dada2_folder,
cfg.mmseq_folder,
cfg.sp_folder,
cfg.procs_folder,
]
if cfg.fresh_start:
logger.info("Fresh start: removing previous intermediates (protected files preserved)")
for folder in folders_to_manage:
if folder:
shutil.rmtree(folder, ignore_errors=True)
os.makedirs(folder, exist_ok=True)
# Clean input fasta folder but protect critical files
protected_names: set[str] = set()
if cfg.virus_fasta_name:
protected_names.add(cfg.virus_fasta_name)
if cfg.isolate_ref_16S:
protected_names.add("progenome16S.fasta")
if cfg.metadata_path:
protected_names.add(Path(cfg.metadata_path).name)
input_folder = Path(cfg.input_fasta_folder)
if input_folder.exists():
for fp in input_folder.iterdir():
if fp.is_file() and fp.name not in protected_names:
fp.unlink()
logger.debug("Removed: %s", fp)
else:
for folder in folders_to_manage:
if folder:
os.makedirs(folder, exist_ok=True)
# Always ensure SpacePHARER subdirectories exist
if cfg.sp_folder:
for sub in ("spacers", "databases", "output", "tmp"):
Path(cfg.sp_folder, sub).mkdir(parents=True, exist_ok=True)
# Ensure protein / clustering paths exist
if cfg.proteins_extraction_path:
os.makedirs(cfg.proteins_extraction_path, exist_ok=True)
if cfg.clustering_path:
os.makedirs(cfg.clustering_path, exist_ok=True)
if cfg.enhanced_networks_folder:
os.makedirs(cfg.enhanced_networks_folder, exist_ok=True)
if cfg.input_fasta_folder:
os.makedirs(cfg.input_fasta_folder, exist_ok=True)
logger.info("Pre-flight complete")