Source code for atlas_q.tdvp

"""
Time-Dependent Variational Principle (TDVP) for MPS

Implements efficient time evolution for Hamiltonian dynamics:
- 1-site TDVP (conserves bond dimension)
- 2-site TDVP (allows bond dimension growth)
- Adaptive time-stepping
- Mixed precision support

Applications:
- Quantum quenches
- Real-time dynamics
- Transport phenomena
- Correlation functions

References:
- Haegeman et al. (2011): "Time-Dependent Variational Principle for Quantum Lattices"
- Haegeman et al. (2016): "Unifying time evolution and optimization with MPS"

Author: ATLAS-Q Contributors
Date: October 2025
License: MIT
"""

from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import torch
from scipy.linalg import expm

from .adaptive_mps import AdaptiveMPS
from .linalg_robust import robust_svd
from .mpo_ops import MPO, expectation_value

# GPU-optimized operations (if available)
try:
    from triton_kernels.tdvp_mpo_ops import (
        tdvp_apply_local_H_optimized,
        tdvp_left_environment_init_optimized,
        tdvp_right_environment_init_optimized,
    )
    GPU_OPTIMIZED_AVAILABLE = True
except ImportError:
    GPU_OPTIMIZED_AVAILABLE = False



[docs]
@dataclass
class TDVPConfig:
    """Configuration for TDVP simulation"""

    dt: float = 0.01  # Time step
    t_final: float = 10.0  # Final time
    order: int = 2  # 1 or 2 site TDVP
    chi_max: int = 128  # Maximum bond dimension
    eps_bond: float = 1e-8  # Truncation tolerance
    adaptive_dt: bool = False  # Adaptive time-stepping
    dt_min: float = 1e-5  # Minimum time step (adaptive)
    dt_max: float = 0.1  # Maximum time step (adaptive)
    error_tol: float = 1e-6  # Error tolerance (adaptive)
    normalize: bool = True  # Normalize MPS after each time step
    krylov_dim: int = 10  # Krylov subspace dimension for matrix exponential
    use_gpu_optimized: bool = True  # Use GPU-optimized contractions (torch.compile)




[docs]
class TDVP1Site:
    """
    1-site TDVP: conserves bond dimension

    Advantages:
    - Fast
    - No bond dimension growth
    - Stable

    Disadvantages:
    - Less accurate for long times
    - Cannot capture entanglement growth
    """


[docs]
    def __init__(self, hamiltonian: MPO, mps: AdaptiveMPS, config: TDVPConfig):
        self.H = hamiltonian
        self.mps = mps
        self.config = config

        assert self.H.n_sites == self.mps.num_qubits
        self.n_sites = self.mps.num_qubits

        # Canonical gauge for stable TDVP
        self.mps.to_left_canonical()

        # Precompute left and right environments
        self.left_envs = self._init_left_environments()
        self.right_envs = self._init_right_environments()

        # Sanity check: environment bonds must match MPS bonds
        n = self.mps.num_qubits
        for i in range(n):
            aL_i = self.mps.tensors[i].shape[0]
            aR_i = self.mps.tensors[i].shape[2]
            assert (
                self.left_envs[i].shape[2] == aL_i
            ), f"L[{i}] right-bond {self.left_envs[i].shape[2]} != aL[{i}] {aL_i}"
            assert (
                self.right_envs[i + 1].shape[0] == aR_i
            ), f"R[{i+1}] left-bond {self.right_envs[i+1].shape[0]} != aR[{i}] {aR_i}"


    def _init_left_environments(self) -> List[torch.Tensor]:
        """
        Initialize left environment tensors.

        L[0] = identity (nothing to the left)
        L[i+1] built from site i (for i = 0..n-1)
        Total size: n+1 environments

        Convention: L[k] has shape [bra_L, mpo_L, ket_L]
        """
        n = self.mps.num_qubits
        device = self.mps.tensors[0].device
        dtype = self.mps.tensors[0].dtype

        L = [torch.ones(1, 1, 1, dtype=dtype, device=device)]

        # Use GPU-optimized version if available and enabled
        use_optimized = (
            GPU_OPTIMIZED_AVAILABLE and self.config.use_gpu_optimized and device.type == "cuda"
        )

        for i in range(n):
            A = self.mps.tensors[i]  # [i, s, j]
            W = self.H.tensors[i].to(device=device, dtype=dtype)  # [l, s, t, n]

            if use_optimized:
                # GPU-optimized contraction (torch.compile + optimized order)
                L_next = tdvp_left_environment_init_optimized(L[i], A, W)
            else:
                # Standard einsum
                Ac = A.conj()  # [q, t, u]
                L_next = torch.einsum("qli, qtu, lstn, isj -> unj", L[i], Ac, W, A)

            L.append(L_next)

        return L

    def _init_right_environments(self) -> List[torch.Tensor]:
        """
        Initialize right environment tensors.

        R[n] = identity (nothing to the right)
        R[i] built from site i (for i = n-1..0, going backward)
        Total size: n+1 environments (indexed 0..n)

        Convention: R[k] has shape [bra_R, mpo_R, ket_R]
        """
        n = self.mps.num_qubits
        device = self.mps.tensors[0].device
        dtype = self.mps.tensors[0].dtype

        # Pre-allocate list with n+1 slots
        R = [None] * (n + 1)
        R[n] = torch.ones(1, 1, 1, dtype=dtype, device=device)

        # Use GPU-optimized version if available and enabled
        use_optimized = (
            GPU_OPTIMIZED_AVAILABLE and self.config.use_gpu_optimized and device.type == "cuda"
        )

        for i in range(n - 1, -1, -1):
            A = self.mps.tensors[i]  # [i, s, j]
            W = self.H.tensors[i].to(device=device, dtype=dtype)  # [l, s, t, n]

            if use_optimized:
                # GPU-optimized contraction (torch.compile + optimized order)
                R[i] = tdvp_right_environment_init_optimized(R[i + 1], A, W)
            else:
                # Standard einsum
                Ac = A.conj()  # [q, t, u]
                R[i] = torch.einsum("unj, qtu, lstn, isj -> qli", R[i + 1], Ac, W, A)

        return R

    def _apply_local_H(self, site: int, A: torch.Tensor) -> torch.Tensor:
        """
        Apply effective Hamiltonian to site tensor.

        Convention:
        L[site] [q,l,i], W[site] [l,s,t,n], A [i,s,j], R[site+1] [u,n,j]
        Output: H_A[i,t,j]
        """
        # L[site] [q,l,i], W[site] [l,s,t,n], A [i,s,j], R[site+1] [u,n,j]
        Ls = self.left_envs[site]
        Rs = self.right_envs[site + 1]
        W = self.H.tensors[site].to(device=A.device, dtype=A.dtype)

        # Use GPU-optimized version if available and enabled
        use_optimized = (
            GPU_OPTIMIZED_AVAILABLE and self.config.use_gpu_optimized and A.device.type == "cuda"
        )

        if use_optimized:
            # GPU-optimized contraction
            return tdvp_apply_local_H_optimized(Ls, W, A, Rs)
        else:
            # Standard einsum
            return torch.einsum("qli, lstn, isj, unj -> itj", Ls, W, A, Rs)


[docs]
    def sweep_forward(self, dt: float):
        """Forward sweep: evolve sites 0 → n-1"""
        n = self.mps.num_qubits

        for site in range(n):
            A = self.mps.tensors[site]

            # Evolve: A(t+dt) = exp(-i H_eff dt) A(t)
            # Use Krylov exponentiation for efficiency
            A_new = self._expm_multiply(-1j * dt, A, site)

            self.mps.tensors[site] = A_new

            # Update left environment (same as initialization)
            if site < n - 1:
                W = self.H.tensors[site].to(device=A_new.device, dtype=A_new.dtype)
                Ac = A_new.conj()
                self.left_envs[site + 1] = torch.einsum(
                    "qli, qtu, lstn, isj -> unj", self.left_envs[site], Ac, W, A_new
                )



[docs]
    def sweep_backward(self, dt: float):
        """Backward sweep: evolve sites n-1 → 0"""
        n = self.mps.num_qubits

        for site in range(n - 1, -1, -1):
            A = self.mps.tensors[site]

            A_new = self._expm_multiply(-1j * dt, A, site)

            self.mps.tensors[site] = A_new

            # Update right environment (same as initialization)
            if site > 0:
                W = self.H.tensors[site].to(device=A_new.device, dtype=A_new.dtype)
                Ac = A_new.conj()
                self.right_envs[site] = torch.einsum(
                    "unj, qtu, lstn, isj -> qli", self.right_envs[site + 1], Ac, W, A_new
                )


    def _expm_multiply(
        self, factor: complex, A: torch.Tensor, site: int, max_iter: int = 30, tol: float = 1e-10
    ) -> torch.Tensor:
        """
        Compute exp(factor * H_eff) |A⟩ using Krylov subspace method

        Args:
            factor: Multiplication factor (typically -i*dt)
            A: Input tensor
            site: Site index
            max_iter: Maximum Krylov iterations
            tol: Convergence tolerance

        Returns:
            exp(factor * H_eff) |A⟩
        """
        # Flatten A for Krylov iteration
        shape = A.shape
        v = A.reshape(-1)
        v = v / torch.norm(v)

        # Arnoldi iteration to build Krylov basis
        V = [v]
        H_krylov = torch.zeros(max_iter + 1, max_iter, dtype=A.dtype, device=A.device)

        for j in range(max_iter):
            # Apply H to current vector
            v_tensor = V[j].reshape(shape)
            w_tensor = self._apply_local_H(site, v_tensor)
            w = w_tensor.reshape(-1)

            # Gram-Schmidt orthogonalization
            for i in range(len(V)):
                H_krylov[i, j] = torch.dot(w.conj(), V[i])
                w = w - H_krylov[i, j] * V[i]

            beta = torch.norm(w)
            H_krylov[j + 1, j] = beta

            if beta < tol:
                break

            V.append(w / beta)

        # Compute exp(factor * H_krylov) e_1 using TRUE matrix exponential
        m = len(V)
        H_small_square = H_krylov[:m, :m].cpu().numpy()

        # Initial vector in Krylov space
        e1 = np.zeros(m, dtype=np.complex128)
        e1[0] = torch.norm(A.reshape(-1)).item()

        # Apply matrix exponential (not element-wise!)
        y = expm(complex(factor) * H_small_square) @ e1

        # Reconstruct result in original space
        result = torch.zeros_like(A.reshape(-1), dtype=A.dtype, device=A.device)
        for i in range(m):
            result += torch.as_tensor(y[i], dtype=A.dtype, device=A.device) * V[i]

        return result.reshape(shape)


[docs]
    def step(self, dt: float):
        """
        Perform one TDVP time step using symmetric Trotter splitting.

        Args:
            dt: Time step size
        """
        self.sweep_forward(dt / 2)
        self.sweep_backward(dt / 2)



[docs]
    def run(self) -> Tuple[List[float], List[complex]]:
        """
        Run TDVP time evolution

        Returns:
            times: List of time points
            energies: List of energy expectation values
        """
        times = []
        energies = []

        t = 0.0
        dt = self.config.dt

        # Record initial state at t=0
        energy = expectation_value(self.H, self.mps)
        times.append(t)
        energies.append(energy)

        while t < self.config.t_final:
            # Symmetric Trotter splitting: dt/2 forward + dt/2 backward
            self.step(dt)

            t += dt

            # Compute energy
            energy = expectation_value(self.H, self.mps)

            times.append(t)
            energies.append(energy)

            # Energy drift check
            if len(energies) > 1:
                dE = abs(energy - energies[-2])
                if dE > 0.01:  # Warning threshold
                    print(f"Warning: Large energy change dE={dE.real:.2e} at t={t:.3f}")

            if len(times) % 10 == 0:
                print(f"t = {t:.3f}, E = {energy.real:.6f}")

        return times, energies





[docs]
class TDVP2Site:
    """
    2-site TDVP: allows bond dimension growth

    Advantages:
    - More accurate
    - Captures entanglement growth
    - Adaptive truncation

    Disadvantages:
    - Slower than 1-site
    - Needs careful truncation management
    """


[docs]
    def __init__(self, hamiltonian: MPO, mps: AdaptiveMPS, config: TDVPConfig):
        self.H = hamiltonian
        self.mps = mps
        self.config = config
        self.n_sites = self.mps.num_qubits

        # Initialize environments
        self.left_envs = []
        self.right_envs = []



[docs]
    def step(self, dt: float):
        """
        Perform one 2-site TDVP time step.

        Args:
            dt: Time step size
        """
        # Sweep right: evolve two-site tensors with SVD
        for i in range(self.mps.num_qubits - 1):
            self._evolve_two_site(i, dt / 2)

        # Sweep left
        for i in range(self.mps.num_qubits - 2, -1, -1):
            self._evolve_two_site(i, dt / 2)



[docs]
    def run(self) -> Tuple[List[float], List[complex]]:
        """Run 2-site TDVP evolution"""
        times = []
        energies = []

        t = 0.0
        dt = self.config.dt

        # Record initial state at t=0
        energy = expectation_value(self.H, self.mps)
        times.append(t)
        energies.append(energy)

        while t < self.config.t_final:
            # Use step() method for consistency
            self.step(dt)

            t += dt

            energy = expectation_value(self.H, self.mps)
            times.append(t)
            energies.append(energy)

            if len(times) % 10 == 0:
                print(
                    f"t = {t:.3f}, E = {energy.real:.6f}, "
                    f"max_χ = {self.mps.stats_summary()['max_chi']}"
                )

        return times, energies


    def _apply_two_site_H(self, site: int, Theta: torch.Tensor) -> torch.Tensor:
        """
        Apply effective two-site Hamiltonian to two-site tensor.

        Args:
            site: Left site index
            Theta: Two-site tensor [i, s1, s2, j]

        Returns:
            H_eff * Theta with same shape as Theta
        """
        # Get MPO tensors for the two sites
        W1 = self.H.tensors[site].to(device=Theta.device, dtype=Theta.dtype)  # [l, s1, t1, m]
        W2 = self.H.tensors[site + 1].to(device=Theta.device, dtype=Theta.dtype)  # [m, s2, t2, r]

        # Merge MPO tensors: W_two = W1 * W2
        # Contract middle bond: W1[l,s1,t1,m] * W2[m,s2,t2,r] -> W_two[l,s1,t1,s2,t2,r]
        W_two = torch.einsum("labm,mcdr->labcdr", W1, W2)

        # Build left and right environments (identity for now - full implementation would use cached envs)
        left_env = torch.ones(1, 1, 1, dtype=Theta.dtype, device=Theta.device)  # [bra_L, mpo_L, ket_L]
        right_env = torch.ones(1, 1, 1, dtype=Theta.dtype, device=Theta.device)  # [bra_R, mpo_R, ket_R]

        # Apply effective Hamiltonian: H_eff * Theta
        # left_env [q,l,i], W_two [l,a,b,c,d,r], Theta [i,a,c,j], right_env [u,r,j]
        # Result: H_Theta [i,b,d,j] where b=t1, d=t2
        H_Theta = torch.einsum(
            "qli,labcdr,iacj,urj->ibdj",
            left_env,
            W_two,
            Theta,
            right_env
        )

        return H_Theta

    def _expm_multiply_two_site(
        self, factor: complex, Theta: torch.Tensor, site: int, max_iter: int = 30, tol: float = 1e-10
    ) -> torch.Tensor:
        """
        Compute exp(factor * H_eff) |Theta⟩ using Krylov subspace method

        Args:
            factor: Multiplication factor (typically -i*dt)
            Theta: Input two-site tensor
            site: Left site index
            max_iter: Maximum Krylov iterations
            tol: Convergence tolerance

        Returns:
            exp(factor * H_eff) |Theta⟩
        """
        # Flatten Theta for Krylov iteration
        shape = Theta.shape
        v = Theta.reshape(-1)
        v = v / torch.norm(v)

        # Arnoldi iteration to build Krylov basis
        V = [v]
        H_krylov = torch.zeros(max_iter + 1, max_iter + 1, dtype=Theta.dtype, device=Theta.device)

        for j in range(max_iter):
            # Apply H to current vector
            v_tensor = V[j].reshape(shape)
            w_tensor = self._apply_two_site_H(site, v_tensor)
            w = w_tensor.reshape(-1)

            # Gram-Schmidt orthogonalization
            for i in range(len(V)):
                H_krylov[i, j] = torch.dot(w.conj(), V[i])
                w = w - H_krylov[i, j] * V[i]

            beta = torch.norm(w)
            H_krylov[j + 1, j] = beta

            if beta < tol:
                break

            V.append(w / beta)

        # Compute exp(factor * H_krylov) e_1 using matrix exponential
        m = len(V)
        H_small_square = H_krylov[:m, :m].cpu().numpy()

        # Initial vector in Krylov space
        e1 = np.zeros(m, dtype=np.complex128)
        e1[0] = torch.norm(Theta.reshape(-1)).item()

        # Apply matrix exponential
        y = expm(complex(factor) * H_small_square) @ e1

        # Reconstruct result in original space
        result = torch.zeros_like(Theta.reshape(-1), dtype=Theta.dtype, device=Theta.device)
        for i in range(m):
            result += torch.as_tensor(y[i], dtype=Theta.dtype, device=Theta.device) * V[i]

        return result.reshape(shape)

    def _evolve_two_site(self, site: int, dt: float):
        """Evolve two-site tensor and truncate"""
        # Merge tensors at sites i and i+1
        A = self.mps.tensors[site]
        B = self.mps.tensors[site + 1]

        Theta = torch.einsum("ijk,klm->ijlm", A, B)

        # Time evolution using Krylov-based matrix exponential (energy-conserving)
        Theta_evolved = self._expm_multiply_two_site(-1j * dt, Theta, site)

        # SVD truncation
        shape = Theta_evolved.shape
        Theta_mat = Theta_evolved.reshape(shape[0] * shape[1], shape[2] * shape[3])

        U, S, Vh, _ = robust_svd(Theta_mat)

        # Truncate
        k = min(len(S), self.config.chi_max)

        # Filter by energy threshold
        S2 = S**2
        cumsum = torch.cumsum(S2, dim=0)
        total = cumsum[-1]
        k_trunc = torch.searchsorted(cumsum, (1 - self.config.eps_bond) * total).item() + 1
        k = min(k, k_trunc)

        # Update tensors
        self.mps.tensors[site] = (U[:, :k] * S[:k]).reshape(shape[0], shape[1], k)
        self.mps.tensors[site + 1] = Vh[:k, :].reshape(k, shape[2], shape[3])

        # Update bond dimension
        if site < len(self.mps.bond_dims):
            self.mps.bond_dims[site] = k




[docs]
def run_tdvp(
    hamiltonian: MPO, initial_mps: AdaptiveMPS, config: Optional[TDVPConfig] = None
) -> Tuple[AdaptiveMPS, List[float], List[complex]]:
    """
    Convenience function to run TDVP

    Args:
        hamiltonian: MPO Hamiltonian
        initial_mps: Initial state
        config: TDVP configuration

    Returns:
        final_mps: Evolved state
        times: Time points
        energies: Energy at each time point
    """
    if config is None:
        config = TDVPConfig()

    if config.order == 1:
        tdvp = TDVP1Site(hamiltonian, initial_mps, config)
    elif config.order == 2:
        tdvp = TDVP2Site(hamiltonian, initial_mps, config)
    else:
        raise ValueError(f"Order must be 1 or 2, got {config.order}")

    times, energies = tdvp.run()

    return initial_mps, times, energies