--- title: "Quick Start Guide for scTypeEval" author: - name: Josep Garnica affiliation: University of Geneva email: josep.garnicacaparros@unige.ch date: "`r Sys.Date()`" output: BiocStyle::html_document: toc: true toc_float: true vignette: > %\VignetteIndexEntry{Quick Start Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = TRUE ) ``` # Overview This quick start guide demonstrates the essential steps for evaluating cell type annotations using `scTypeEval`. For a comprehensive tutorial, see the main vignette. ```{r load} library(scTypeEval) ``` # Minimal Workflow ## From a Count Matrix ```{r minimal_workflow} library(Matrix) # Generate example data set.seed(123) counts <- Matrix(rpois(50000, 5), nrow=500, ncol=100, sparse=TRUE) rownames(counts) <- paste0("Gene", seq_len(500)) colnames(counts) <- paste0("Cell", seq_len(100)) metadata <- data.frame( celltype = rep(c("TypeA", "TypeB", "TypeC", "TypeD"), each=25), sample = rep(paste0("S", seq_len(5)), times=20), row.names = colnames(counts) ) # Create object sceval <- create_scTypeEval(matrix=counts, metadata=metadata) # Process data sceval <- run_processing_data( sceval, ident = "celltype", sample = "sample", min_samples = 3, min_cells = 5 ) # Identify features sceval <- run_hvg(sceval, var_method = "basic", ngenes = 1000) # Run PCA sceval <- run_pca(sceval, ndim = 20) # Compute dissimilarity sceval <- run_dissimilarity( sceval, method = "Pseudobulk:Euclidean", reduction = TRUE ) # Get consistency results <- get_consistency( sceval, dissimilarity_slot = "Pseudobulk:Euclidean", consistency_metric = "silhouette" ) print(results) ``` ## From a Seurat Object ```{r seurat_example, message= F} library(Seurat) # Create Seurat object with example data generated earlier seurat_obj <- Seurat::CreateSeuratObject( counts = counts, meta.data = metadata ) sceval_seurat <- create_scTypeEval(seurat_obj) # Continue with standard workflow ``` ## From a SingleCellExperiment Object ```{r sce_example, message = F} library(SingleCellExperiment) # Create SCE object with example data generated earlier sce <- SingleCellExperiment::SingleCellExperiment( assays = list(counts = counts), colData = metadata ) sceval_sce <- create_scTypeEval(sce) # Continue with workflow as above ``` # Common Use Cases ## Compare Multiple Dissimilarity Methods ```{r compare_methods} # Compute different dissimilarity methods sceval <- run_dissimilarity( sceval, method = "Pseudobulk:Euclidean", reduction = TRUE ) sceval <- run_dissimilarity( sceval, method = "Pseudobulk:Cosine", reduction = TRUE ) sceval <- run_dissimilarity( sceval, method = "WasserStein", reduction = TRUE ) # Compare consistency across methods dissimilarity_methods <- c("Pseudobulk:Euclidean", "Pseudobulk:Cosine", "WasserStein") results_df <- get_consistency( sceval, dissimilarity_slot = dissimilarity_methods, # compute for multiple dissimilarities consistency_metric = "silhouette" ) results_df ``` ## Evaluate Multiple Consistency Metrics ```{r multiple_metrics} # Compute multiple consistency metrics consistency_metrics <- c("silhouette", "NeighborhoodPurity", "Average_similarity") all_metrics <- get_consistency( sceval, dissimilarity_slot = "Pseudobulk:Euclidean", consistency_metric = consistency_metrics ) all_metrics ``` ## Visualize Results ```{r visualize} # Heatmap of dissimilarities plot_heatmap( sceval, dissimilarity_slot = "Pseudobulk:Euclidean", sort_consistency = "silhouette" ) # Pseudobulk PCA per sample & cell type plot_pca( sceval, reduction_slot = "pseudobulk" ) ``` ## Using Marker Genes Instead of HVGs ```{r markers} # Identify cell type markers sceval <- run_gene_markers( sceval, method = "scran.findMarkers", ngenes_celltype = 50 ) # Use markers for dissimilarity calculation sceval <- run_dissimilarity( sceval, method = "Pseudobulk:Euclidean", gene_list = "scran.findMarkers", # gene list recently added reduction = FALSE ) ``` ## Focus on Specific Gene Sets ```{r custom_gene_sets} # Add custom gene list immune_genes <- c("CD3D", "CD4", "CD8A", "CD19", "CD14", "NCAM1") sceval <- add_gene_list( sceval, gene_list = list("immune_markers" = immune_genes) # add a named list ) # Run analysis on custom genes sceval <- run_dissimilarity( sceval, method = "Pseudobulk:Euclidean", gene_list = "immune_markers" # name of the list to use ) ``` # Interpreting Results ## What Low Scores Mean Low consistency scores may indicate: - **Ambiguous cell type boundaries** between related types - **Heterogeneous populations** needing refinement - **Annotation inconsistencies** across samples ## Next Steps for Low-Scoring Cell Types 1. **Visualize** using `plot_heatmap()` or `plot_pca()` to identify problematic samples 2. **Investigate** biological differences (e.g., disease vs. healthy) 3. **Refine** annotations by splitting or merging cell types # Available Methods and Metrics ## Dissimilarity Methods - **Pseudobulk:Euclidean** - Euclidean distance on pseudobulk profiles - **Pseudobulk:Cosine** - Cosine distance on pseudobulk profiles - **Pseudobulk:Pearson** - Pearson correlation distance on pseudobulk profiles - **WasserStein** - Wasserstein distance between cell distributions - **recip_classif:Match** - Reciprocal classification matching - **recip_classif:Score** - Reciprocal classification scoring ## Consistency Metrics - **silhouette** - Standard silhouette coefficient - **2label_silhouette** - Two-label silhouette variant - **NeighborhoodPurity** - K-nearest neighbor purity - **ward_PropMatch** - Ward clustering proportion match - **Orbital_medoid** - Medoid-based orbital metric - **Average_similarity** - Average within-group similarity # Tips and Best Practices 1. **Always use multiple samples** (minimum 3-5 per cell type) 2. **Compare different methods** - no single method is perfect 3. **Use PCA for speed** - similar results, much faster 4. **Start with HVGs** - then try marker genes if needed 5. **Check sample sizes** - ensure adequate cells per type per sample 6. **Interpret in context** - consider biological heterogeneity # Getting Help - **GitHub**: https://github.com/carmonalab/scTypeEval - **Issues**: https://github.com/carmonalab/scTypeEval/issues - **Main vignette**: `browseVignettes("scTypeEval")` # Session Info ```{r session_info} sessionInfo() ```