Source code for stangene

"""Stangene: gene identifier harmonization for single-cell transcriptomics."""

__version__ = "0.1.0"

from stangene.classify import classify_features
from stangene.harmonize import HarmonizationResult, harmonize
from stangene.io import load_features
from stangene.merge import MergeResult, merge_features
from stangene.references import (
    ReferenceNotFoundError,
    build_reference,
    load_reference,
)
from stangene.report import conflict_report, generate_markdown_report, summary, write_reports
from stangene._logging import get_logger

_logger = get_logger("run")


[docs] def run( path: str, species: str, output_dir: str = None, dataset_name: str = None, reference_dir: str = None, ) -> HarmonizationResult: """Run the full harmonization pipeline on a single dataset.""" _logger.info("Starting harmonization: path=%s, species=%s", path, species) ft = load_features(path, species=species, dataset_name=dataset_name) ft = classify_features(ft) ref = load_reference(species, reference_dir=reference_dir) result = harmonize(ft, ref) if output_dir: from stangene.io import write_results write_results(result, output_dir, input_path=path) write_reports(result, output_dir) _logger.info("Harmonization complete: %s", result.stats) return result