using System.Collections;
public class DiffusionToy : MonoBehaviour
public Texture2D seedImage; // optional: grayscale seed in Resources
[Range(5, 100)] public int steps = 30;
[Range(0f, 1f)] public float eta = 0f; // keep 0 for deterministic DDIM here
[Range(0f, 1f)] public float sharpenStrength = 0.6f; // s in formula above
[Range(0.01f, 0.6f)] public float gazeSigma = 0.18f; // mask size as frac of min(H,W)
public bool runContinuously = true; // if true, keeps looping
public bool forwardDemo = false; // toggles forward noising visualization
float[] _x; // current sample, normalized [0,1] (grayscale)
float[] _xStart; // optional x0 (seed image)
float[] _mask; // gaze mask M
_rng = new System.Random(seed);
_sched = new Scheduler(steps);
// Init texture and buffers
_tex = new Texture2D(width, height, TextureFormat.RGBA32, false, true);
_tex.filterMode = FilterMode.Point;
if (output != null) output.texture = _tex;
_x = new float[width * height];
_mask = new float[width * height];
// Optional seed image → grayscale array
_xStart = ImageUtils.TextureToGrayArray(ImageUtils.Resample(seedImage, width, height));
for (int i = 0; i < _xStart.Length; i++)
_xStart[i] = Mathf.Clamp01(_xStart[i]);
StartCoroutine(MainLoop());
yield return StartCoroutine(ForwardNoisingDemo());
yield return StartCoroutine(ReverseSampling());
if (!runContinuously) yield break;
IEnumerator ForwardNoisingDemo()
// show x_t from x0 + noise
System.Array.Copy(_xStart, _x, _x.Length);
for (int ti = 0; ti < steps; ti++)
int t = _sched.Timesteps[ti];
float abar = _sched.AlphaBar[ti];
float sigma = Mathf.Sqrt(1f - abar);
for (int i = 0; i < _x.Length; i++)
float eps = NextFloatNormal(_rng);
float x0 = (_xStart != null) ? _xStart[i] : 0.5f;
_x[i] = Mathf.Sqrt(abar) * x0 + sigma * eps * 0.5f + 0.5f; // center noise a bit
IEnumerator ReverseSampling()
// Start from pure noise x_T
for (int i = 0; i < _x.Length; i++)
_x[i] = Mathf.Clamp01(0.5f + 0.25f * NextFloatNormal(_rng));
for (int ti = steps - 1; ti >= 0; ti--)
// 1) Build gaze mask M_t from mouse
BuildGazeMask(_mask, gazeSigma);
// 2) We need an epsilon estimate. In a real model this comes from a UNet.
// Here we back out eps from our current x_t and a "desired" x0 (guided by sharpen in gaze).
float abar_t = _sched.AlphaBar[ti];
float sqrtAbar_t = Mathf.Sqrt(abar_t);
float sqrtOneMinusAbar_t = Mathf.Sqrt(1f - abar_t);
// First get a raw x0 estimate by assuming eps≈0 (a crude prior)
// x0_hat = (x_t - sqrt(1 - abar_t)*eps_hat)/sqrt(abar_t)
// With eps_hat ≈ 0 → x0_hat ≈ x_t / sqrt(abar_t)
// Clamp to [0,1] to keep things stable.
float[] x0_hat = new float[_x.Length];
for (int i = 0; i < _x.Length; i++)
x0_hat[i] = Mathf.Clamp01(_x[i] / (sqrtAbar_t + 1e-6f));
// 3) Sharpen inside the gaze mask to “want” detail where you look
float[] x0_sharp = ImageUtils.UnsharpMask(x0_hat, width, height, radius: 1, amount: 1.0f);
float[] x0_tilde = new float[_x.Length];
for (int i = 0; i < _x.Length; i++)
float s = sharpenStrength * _mask[i]; // spatial strength
x0_tilde[i] = Mathf.Clamp01((1f - s) * x0_hat[i] + s * x0_sharp[i]);
// 4) Recompute eps to be consistent with our guided x0_tilde
float[] eps_hat = new float[_x.Length];
for (int i = 0; i < _x.Length; i++)
eps_hat[i] = (sqrtAbar_t * x0_tilde[i] - _x[i]) / (sqrtOneMinusAbar_t + 1e-6f);
float abar_prev = (ti > 0) ? _sched.AlphaBar[ti - 1] : 1f;
float sqrtAbarPrev = Mathf.Sqrt(abar_prev);
float sqrtOneMinusAbarPrev = Mathf.Sqrt(1f - abar_prev);
float[] x_prev = new float[_x.Length];
for (int i = 0; i < _x.Length; i++)
x_prev[i] = Mathf.Clamp01(sqrtAbarPrev * x0 + sqrtOneMinusAbarPrev * eps_hat[i]);
yield return null; // display each step per frame; make it 0 to go faster
void BuildGazeMask(float[] mask, float sigmaFrac)
// Mouse → world-independent: map to texture coords in [0,1]
Vector3 m = Input.mousePosition;
float u = Mathf.Clamp01(m.x / Mathf.Max(1, Screen.width));
float v = Mathf.Clamp01(m.y / Mathf.Max(1, Screen.height));
int cx = Mathf.RoundToInt(u * (width - 1));
int cy = Mathf.RoundToInt(v * (height - 1));
float sigma = sigmaFrac * Mathf.Min(width, height);
float twoSigma2 = 2f * sigma * sigma;
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
float g = Mathf.Exp(-(dx * dx + dy * dy) / twoSigma2);
mask[y * width + x] = g; // 0..1
// normalize peak to 1 isn’t necessary here, but you can remap if you like.
void BlitToTexture(float[] gray)
var cols = new Color32[gray.Length];
for (int i = 0; i < gray.Length; i++)
byte b = (byte)Mathf.RoundToInt(Mathf.Clamp01(gray[i]) * 255f);
cols[i] = new Color32(b, b, b, 255);
void FillWithGray(float[] arr, float val)
for (int i = 0; i < arr.Length; i++) arr[i] = val;
static float NextFloatNormal(System.Random rng)
double u1 = 1.0 - rng.NextDouble();
double u2 = 1.0 - rng.NextDouble();
return (float)(System.Math.Sqrt(-2.0 * System.Math.Log(u1)) * System.Math.Sin(2.0 * System.Math.PI * u2));