"""
PhaseLab Enhancer Targeting: CRISPRa guide design for enhancer activation.
Implements:
- Enhancer region identification and scoring
- eQTL-based enhancer-gene linking
- CRISPRa enhancer activation prediction
- Multi-enhancer combinatorial activation
- Tissue-specific enhancer models
References:
- crisprQTL: Mapping enhancer-gene pairs via CRISPR screens
- pgBoost: Probabilistic scoring for enhancer-gene links
- Fulco et al. Activity-by-contact model for enhancer prediction
- ENCODE enhancer annotations (cCREs)
IMPORTANT: Many genes are difficult to activate through promoter-only
CRISPRa. Targeting enhancers can provide stronger or tissue-specific
activation for genes like MYC, NRXN1, SCN2A, and MECP2.
Version: 0.5.0
"""
import numpy as np
from typing import Dict, List, Optional, Tuple, Union, Any
from dataclasses import dataclass, field
import logging
from ..core.coherence import coherence_score, go_no_go
from ..core.constants import E_MINUS_2
logger = logging.getLogger(__name__)
# ENCODE cCRE (candidate Cis-Regulatory Element) types
CCRE_TYPES = {
'pELS': 'proximal enhancer-like signature', # <2kb from TSS
'dELS': 'distal enhancer-like signature', # >2kb from TSS
'PLS': 'promoter-like signature',
'CTCF-only': 'CTCF-bound only',
'DNase-H3K4me3': 'promoter-proximal',
}
# Activity-by-contact model parameters
ABC_MODEL_PARAMS = {
'activity_weight': 0.6, # Weight for enhancer activity signal
'contact_weight': 0.4, # Weight for 3D contact frequency
'distance_decay': 5000, # Distance for contact decay (bp)
'min_abc_score': 0.02, # Minimum ABC score for significance
}
[docs]
@dataclass
class Enhancer:
"""Enhancer element annotation."""
chrom: str
start: int
end: int
name: str = ""
# Type and activity
element_type: str = "dELS" # pELS, dELS, etc.
activity_score: float = 0.0 # H3K27ac/ATAC signal
# Target gene information
target_gene: Optional[str] = None
target_gene_tss: Optional[int] = None
distance_to_tss: int = 0
# Linking scores
abc_score: float = 0.0 # Activity-by-contact
eqtl_support: bool = False # Has eQTL evidence
hi_c_contact: float = 0.0 # Hi-C contact frequency
# Tissue specificity
tissue: str = "generic"
tissue_specific: bool = False
@property
def center(self) -> int:
return (self.start + self.end) // 2
@property
def length(self) -> int:
return self.end - self.start
@property
def is_proximal(self) -> bool:
return abs(self.distance_to_tss) < 2000
[docs]
@dataclass
class EnhancerGuideResult:
"""Result from enhancer-targeting guide design."""
guide_sequence: str
guide_position: int
enhancer: Enhancer
# Scores
activation_potential: float
guide_quality: float
combined_score: float
# Coherence
coherence_R: float
go_no_go: str
# Recommendations
recommendation: str
[docs]
@dataclass
class EnhancerConfig:
"""Configuration for enhancer targeting."""
# Enhancer definition
min_enhancer_length: int = 200
max_enhancer_distance: int = 1_000_000 # 1Mb max
# Activity thresholds
min_activity_score: float = 0.3
min_abc_score: float = 0.02
# Guide design
guides_per_enhancer: int = 3
prefer_enhancer_center: bool = True
# Tissue specificity
require_tissue_match: bool = False
target_tissue: str = "generic"
def calculate_abc_score(
activity: float,
contact: float,
distance: int,
config: Optional[Dict] = None,
) -> float:
"""
Calculate Activity-by-Contact (ABC) score for enhancer-gene link.
ABC model: Score = Activity × Contact / Σ(Activity × Contact)
Based on Fulco et al. (2019) - predicts enhancer-gene pairs.
Args:
activity: Enhancer activity signal (H3K27ac, ATAC)
contact: 3D contact frequency (Hi-C, if available)
distance: Distance to target gene TSS
config: ABC model parameters
Returns:
ABC score (higher = stronger link)
"""
if config is None:
config = ABC_MODEL_PARAMS
# If no contact data, estimate from distance
if contact <= 0:
# Power-law decay with distance
contact = 1.0 / (1.0 + (distance / config['distance_decay']) ** 1.5)
# ABC score (not normalized - would need all enhancers for normalization)
abc = activity * contact
return float(abc)
def estimate_enhancer_activity(
h3k27ac_signal: float = 0.0,
atac_signal: float = 0.0,
dnase_signal: float = 0.0,
) -> float:
"""
Estimate enhancer activity from epigenetic signals.
Args:
h3k27ac_signal: H3K27ac ChIP-seq signal (active mark)
atac_signal: ATAC-seq signal
dnase_signal: DNase-seq signal
Returns:
Activity score 0-1
"""
signals = []
# H3K27ac is the strongest indicator of active enhancers
if h3k27ac_signal > 0:
signals.append(h3k27ac_signal * 1.2) # Higher weight
if atac_signal > 0:
signals.append(atac_signal)
if dnase_signal > 0:
signals.append(dnase_signal * 0.8)
if not signals:
return 0.0
# Combine signals (weighted average, capped at 1.0)
activity = np.mean(signals)
return float(min(1.0, activity))
[docs]
def score_enhancer_for_activation(
enhancer: Enhancer,
config: Optional[EnhancerConfig] = None,
) -> Dict[str, float]:
"""
Score an enhancer's potential for CRISPRa activation.
Considers:
- Enhancer activity (already active enhancers harder to boost)
- Distance to target gene
- ABC score (link confidence)
- Tissue specificity
Args:
enhancer: Enhancer to score
config: EnhancerConfig
Returns:
Scoring dictionary
"""
if config is None:
config = EnhancerConfig()
scores = {}
# Activity-based potential
# Moderately active enhancers have highest activation potential
# Very active = already maxed, inactive = hard to activate
if enhancer.activity_score < 0.2:
activity_potential = 0.4 # Weak enhancer, harder to activate
elif enhancer.activity_score < 0.5:
activity_potential = 0.9 # Moderate, good potential
elif enhancer.activity_score < 0.8:
activity_potential = 0.7 # Already active, some headroom
else:
activity_potential = 0.3 # Near maximum, little room to boost
scores['activity_potential'] = activity_potential
# Distance factor
# Proximal enhancers easier to link to target gene
distance = abs(enhancer.distance_to_tss)
if distance < 2000:
distance_factor = 1.0
elif distance < 10000:
distance_factor = 0.9
elif distance < 100000:
distance_factor = 0.7
else:
distance_factor = 0.5
scores['distance_factor'] = distance_factor
# ABC score contribution
if enhancer.abc_score >= config.min_abc_score:
abc_factor = min(1.0, enhancer.abc_score / 0.1) # Normalize
else:
abc_factor = 0.3 # Below threshold, uncertain link
scores['abc_factor'] = abc_factor
# eQTL evidence boost
if enhancer.eqtl_support:
scores['eqtl_boost'] = 1.2
else:
scores['eqtl_boost'] = 1.0
# Combined score
combined = (
0.35 * activity_potential +
0.25 * distance_factor +
0.25 * abc_factor +
0.15 * (1.0 if enhancer.eqtl_support else 0.5)
) * scores['eqtl_boost']
scores['combined_potential'] = float(min(1.0, combined))
return scores
[docs]
def identify_target_enhancers(
gene_symbol: str,
gene_tss: int,
gene_chrom: str,
candidate_enhancers: List[Enhancer],
config: Optional[EnhancerConfig] = None,
) -> List[Tuple[Enhancer, float]]:
"""
Identify enhancers likely to regulate a target gene.
Args:
gene_symbol: Target gene symbol
gene_tss: Gene TSS position
gene_chrom: Gene chromosome
candidate_enhancers: List of candidate enhancers
config: EnhancerConfig
Returns:
List of (enhancer, link_score) tuples, sorted by score
"""
if config is None:
config = EnhancerConfig()
results = []
for enh in candidate_enhancers:
# Check chromosome match
if enh.chrom != gene_chrom:
continue
# Calculate distance
distance = abs(enh.center - gene_tss)
# Check maximum distance
if distance > config.max_enhancer_distance:
continue
# Update enhancer with distance
enh.distance_to_tss = distance
enh.target_gene = gene_symbol
enh.target_gene_tss = gene_tss
# Calculate ABC score if not already set
if enh.abc_score <= 0:
enh.abc_score = calculate_abc_score(
activity=enh.activity_score,
contact=enh.hi_c_contact,
distance=distance,
)
# Score for CRISPRa potential
scores = score_enhancer_for_activation(enh, config)
link_score = scores['combined_potential']
if link_score >= 0.3: # Minimum threshold
results.append((enh, link_score))
# Sort by link score
results.sort(key=lambda x: x[1], reverse=True)
return results
[docs]
def design_enhancer_guides(
enhancer: Enhancer,
sequence: str,
enhancer_start_in_seq: int,
config: Optional[EnhancerConfig] = None,
guide_length: int = 20,
pam: str = "NGG",
) -> List[EnhancerGuideResult]:
"""
Design CRISPRa guides targeting an enhancer.
For enhancer activation, guides should target:
- Near the enhancer center (peak of activity)
- Accessible regions within enhancer
- Avoid repressive elements
Args:
enhancer: Target enhancer
sequence: DNA sequence containing enhancer
enhancer_start_in_seq: Position of enhancer start in sequence
config: EnhancerConfig
guide_length: Guide length (default 20)
pam: PAM sequence
Returns:
List of EnhancerGuideResult objects
"""
if config is None:
config = EnhancerConfig()
# Import here to avoid circular imports
from .pam_scan import find_pam_sites
from .scoring import gc_content, delta_g_santalucia, mit_specificity_score
sequence = sequence.upper()
results = []
# Define search region within enhancer
enh_start = enhancer_start_in_seq
enh_end = enh_start + enhancer.length
# Find PAM sites within enhancer
all_hits = find_pam_sites(
sequence,
pam=pam,
guide_length=guide_length,
both_strands=True,
)
# Filter to enhancer region
enhancer_hits = [
h for h in all_hits
if enh_start <= h.guide_start < enh_end
]
if not enhancer_hits:
logger.warning(f"No guides found in enhancer {enhancer.name}")
return results
# Score enhancer potential
enh_scores = score_enhancer_for_activation(enhancer, config)
# Score each guide
for hit in enhancer_hits:
guide_seq = hit.guide
guide_pos = hit.guide_start
# Basic guide quality
gc = gc_content(guide_seq)
if gc < 0.35 or gc > 0.75:
continue # Skip bad GC
delta_g = delta_g_santalucia(guide_seq)
mit = mit_specificity_score(guide_seq)
# Position within enhancer (prefer center)
enh_center = enh_start + enhancer.length // 2
distance_from_center = abs(guide_pos - enh_center)
center_factor = 1.0 - min(1.0, distance_from_center / (enhancer.length / 2))
# Guide quality score
guide_quality = (
0.3 * (1.0 if 0.45 <= gc <= 0.65 else 0.6) +
0.3 * min(1.0, -delta_g / 25.0) +
0.2 * (mit / 100.0) +
0.2 * center_factor
)
# Activation potential (enhancer × guide quality)
activation_potential = enh_scores['combined_potential'] * guide_quality
# IR coherence (simplified)
R_bar = _compute_guide_coherence(guide_seq)
status = go_no_go(R_bar)
# Combined score
combined = (
0.4 * activation_potential +
0.3 * guide_quality +
0.3 * R_bar
)
# Recommendation
if status == "NO-GO":
recommendation = "LOW COHERENCE: Guide prediction unreliable"
elif activation_potential >= 0.6:
recommendation = f"EXCELLENT: High activation potential ({activation_potential:.2f})"
elif activation_potential >= 0.4:
recommendation = f"GOOD: Moderate activation potential ({activation_potential:.2f})"
else:
recommendation = f"MARGINAL: Low activation potential ({activation_potential:.2f})"
result = EnhancerGuideResult(
guide_sequence=guide_seq,
guide_position=guide_pos,
enhancer=enhancer,
activation_potential=activation_potential,
guide_quality=guide_quality,
combined_score=combined,
coherence_R=R_bar,
go_no_go=status,
recommendation=recommendation,
)
results.append(result)
# Sort by combined score
results.sort(key=lambda r: r.combined_score, reverse=True)
# Return top guides
return results[:config.guides_per_enhancer]
def _compute_guide_coherence(guide_seq: str) -> float:
"""Compute IR coherence using ATLAS-Q enhanced backend (v0.6.0+)."""
from .coherence_utils import compute_guide_coherence
return compute_guide_coherence(guide_seq, use_atlas_q=True)
[docs]
def predict_enhancer_activation_effect(
enhancer: Enhancer,
guide_quality: float,
baseline_expression: float = 1.0,
) -> Dict[str, float]:
"""
Predict effect of enhancer CRISPRa on target gene expression.
Args:
enhancer: Target enhancer
guide_quality: Quality score of guide used
baseline_expression: Baseline gene expression level
Returns:
Prediction dictionary with fold-change estimates
"""
# Enhancer activation typically gives 1.5-5x fold change
# depending on enhancer strength and gene responsiveness
# Base fold change from enhancer activity
base_fold = 1.5 + 2.5 * enhancer.abc_score
# Adjust for guide quality
fold_change = base_fold * (0.5 + 0.5 * guide_quality)
# Distance penalty
distance_penalty = 1.0 - 0.3 * min(1.0, enhancer.distance_to_tss / 500000)
fold_change *= distance_penalty
# Confidence based on evidence
if enhancer.eqtl_support:
confidence = 0.8
elif enhancer.abc_score >= 0.05:
confidence = 0.6
else:
confidence = 0.4
return {
'predicted_fold_change': float(fold_change),
'fold_change_low': float(fold_change * 0.6),
'fold_change_high': float(fold_change * 1.5),
'confidence': confidence,
'baseline_expression': baseline_expression,
'predicted_expression': baseline_expression * fold_change,
}
# Pre-defined enhancer databases for common therapeutic targets
KNOWN_ENHANCERS = {
'RAI1': [
{
'name': 'RAI1_enh1',
'chrom': 'chr17',
'start': 17679000,
'end': 17680000,
'distance_to_tss': -2500,
'activity_score': 0.65,
'tissue': 'brain',
},
],
'SCN2A': [
{
'name': 'SCN2A_enh1',
'chrom': 'chr2',
'start': 165235000,
'end': 165236500,
'distance_to_tss': -4400,
'activity_score': 0.72,
'tissue': 'brain',
},
],
'MECP2': [
{
'name': 'MECP2_enh1',
'chrom': 'chrX',
'start': 154025000,
'end': 154026000,
'distance_to_tss': -15000,
'activity_score': 0.58,
'tissue': 'brain',
},
],
}
[docs]
def get_known_enhancers(gene: str) -> List[Enhancer]:
"""
Get known enhancers for a gene from built-in database.
Args:
gene: Gene symbol
Returns:
List of Enhancer objects
"""
gene = gene.upper()
if gene not in KNOWN_ENHANCERS:
return []
enhancers = []
for enh_data in KNOWN_ENHANCERS[gene]:
enh = Enhancer(
chrom=enh_data['chrom'],
start=enh_data['start'],
end=enh_data['end'],
name=enh_data['name'],
activity_score=enh_data.get('activity_score', 0.5),
distance_to_tss=enh_data.get('distance_to_tss', 0),
tissue=enh_data.get('tissue', 'generic'),
target_gene=gene,
)
enhancers.append(enh)
return enhancers