<< All versions
Skill v1.0.1
currentLLM-judged scan100/100majiayu000/claude-skill-registry-data/bio-atac-seq-differential-accessibility
3 files
──Details
PublishedMay 23, 2026 at 09:16 AM
Content Hashsha256:c254f58442d7170f...
Git SHA56773ea772f3
Bump Typepatch
──Files
Files (1 file, 5.4 KB)
SKILL.md5.4 KBactive
SKILL.md · 239 lines · 5.4 KB
version: "1.0.1" name: bio-atac-seq-differential-accessibility description: Find differentially accessible chromatin regions between conditions using DiffBind or DESeq2. Use when comparing chromatin accessibility between treatment groups, cell types, or developmental stages in ATAC-seq experiments. tool_type: r primary_tool: DiffBind
Differential Accessibility
DiffBind Workflow
r
library(DiffBind)# 1. Create sample sheetsamples <- data.frame(SampleID = c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2'),Condition = c('control', 'control', 'treated', 'treated'),Replicate = c(1, 2, 1, 2),bamReads = c('ctrl_1.bam', 'ctrl_2.bam', 'treat_1.bam', 'treat_2.bam'),Peaks = c('ctrl_1.narrowPeak', 'ctrl_2.narrowPeak', 'treat_1.narrowPeak', 'treat_2.narrowPeak'))write.csv(samples, 'samples.csv', row.names=FALSE)# 2. Load datadba <- dba(sampleSheet='samples.csv')# 3. Count readsdba <- dba.count(dba)# 4. Normalize (required in DiffBind 3.0+)dba <- dba.normalize(dba)# 5. Set up contrastsdba <- dba.contrast(dba, contrast=c('Condition', 'treated', 'control'))# 6. Differential analysisdba <- dba.analyze(dba)# 7. Get resultsresults <- dba.report(dba)
DiffBind with Consensus Peaks
r
library(DiffBind)# Load samplesdba <- dba(sampleSheet='samples.csv')# Count with specific parametersdba <- dba.count(dba,summits=250, # Re-center peaks on summitminOverlap=2, # Peak in at least 2 samplesscore=DBA_SCORE_NORMALIZED)# Normalizedba <- dba.normalize(dba, normalize=DBA_NORM_NATIVE)# Analyzedba <- dba.contrast(dba, contrast=c('Condition', 'treated', 'control'))dba <- dba.analyze(dba, method=DBA_DESEQ2)# Extract resultsresults <- dba.report(dba, th=0.05, bCounts=TRUE)# Savewrite.csv(as.data.frame(results), 'differential_peaks.csv')
DiffBind Visualizations
r
# PCA plotdba.plotPCA(dba, attributes=DBA_CONDITION)# MA plotdba.plotMA(dba)# Volcano plotdba.plotVolcano(dba)# Heatmap of differential peaksdba.plotHeatmap(dba, contrast=1, correlations=FALSE)# Venn diagram of overlapping peaksdba.plotVenn(dba, contrast=1, bDB=TRUE, bGain=TRUE, bLoss=TRUE)
Using DESeq2 Directly
r
library(DESeq2)library(GenomicRanges)# Load peak counts (from featureCounts or custom counting)counts <- read.delim('peak_counts.txt', row.names=1)# Sample metadatacoldata <- data.frame(row.names = colnames(counts),condition = factor(c('control', 'control', 'treated', 'treated')))# Create DESeq objectdds <- DESeqDataSetFromMatrix(countData=counts, colData=coldata, design=~condition)# Filter low countsdds <- dds[rowSums(counts(dds)) >= 10, ]# Run DESeq2dds <- DESeq(dds)# Resultsres <- results(dds, contrast=c('condition', 'treated', 'control'))res <- res[order(res$padj), ]# Significant peakssig <- subset(res, padj < 0.05 & abs(log2FoldChange) > 1)
Count Reads in Peaks
bash
# Using featureCounts# First convert peaks to SAF formatawk 'BEGIN{OFS="\t"; print "GeneID\tChr\tStart\tEnd\tStrand"}{print $1"_"$2"_"$3, $1, $2, $3, "."}' consensus_peaks.bed > peaks.saffeatureCounts \-a peaks.saf \-F SAF \-o peak_counts.txt \-p \--countReadPairs \-T 8 \*.bam
Python Alternative
python
import pandas as pdimport numpy as npfrom scipy import statsdef simple_differential(counts_file, groups):'''Simple differential accessibility test.'''counts = pd.read_csv(counts_file, sep='\t', index_col=0, comment='#')# Normalize to CPMcpm = counts.div(counts.sum()) * 1e6# Log transformlog_cpm = np.log2(cpm + 1)# Separate groupsgroup1 = [c for c in counts.columns if groups[c] == 'control']group2 = [c for c in counts.columns if groups[c] == 'treated']results = []for peak in counts.index:g1_vals = log_cpm.loc[peak, group1]g2_vals = log_cpm.loc[peak, group2]log2fc = g2_vals.mean() - g1_vals.mean()t_stat, pval = stats.ttest_ind(g1_vals, g2_vals)results.append({'peak': peak,'log2FoldChange': log2fc,'pvalue': pval})df = pd.DataFrame(results)df['padj'] = stats.false_discovery_control(df['pvalue'])return df
Annotate Differential Peaks
r
library(ChIPseeker)library(TxDb.Hsapiens.UCSC.hg38.knownGene)# Annotate differential peaksdiff_peaks <- dba.report(dba)peakAnno <- annotatePeak(diff_peaks, TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene)# Plot annotationplotAnnoPie(peakAnno)plotDistToTSS(peakAnno)# Get genesgenes <- as.data.frame(peakAnno)$geneId
Filter Results
r
# Get significant resultssig_peaks <- dba.report(dba, th=0.05, fold=1)# Opened in treatmentopened <- sig_peaks[sig_peaks$Fold > 0]# Closed in treatmentclosed <- sig_peaks[sig_peaks$Fold < 0]# Export as BEDexport.bed(opened, 'opened_peaks.bed')export.bed(closed, 'closed_peaks.bed')
Multi-factor Designs
r
# Complex design with batch correctionsamples$Batch <- factor(c('A', 'B', 'A', 'B'))dba <- dba(sampleSheet=samples)dba <- dba.count(dba)dba <- dba.normalize(dba)# Design formula approachdba <- dba.contrast(dba, design='~Batch + Condition')dba <- dba.analyze(dba)
Related Skills
- atac-seq/atac-peak-calling - Generate input peaks
- differential-expression/deseq2-basics - DESeq2 methods
- chip-seq/differential-binding - Similar DiffBind workflow
- pathway-analysis/go-enrichment - Analyze differential genes