// - com.unity.audio.dspgraph (DSPGraph Audio Framework)
// - Burst, Collections, Mathematics
// - Hooks Unity's default audio output using DSPGraph's IAudioOutput.
// - On the audio mixer thread, it reads the final mixed output buffer,
// accumulates into an FFT window, runs a radix-2 FFT (Burst-friendly),
// and publishes a magnitude spectrum + a sample-accurate DSPClock timestamp.
// - On the main thread, you can pull the latest spectrum without allocations.
// Why this is "minimum theoretical latency":
// - Analysis runs in EndMix() on the mixer thread, per audio buffer.
// - The earliest you can know about audio is one DSP buffer "late":
// latency ≈ dspBufferSize / sampleRate seconds (plus ADC/driver I/O if mic).
using Unity.Collections.LowLevel.Unsafe;
public sealed class UltraLowLatencyFftAnalyzer : MonoBehaviour
[Tooltip("Must be a power of two. If set to 0, uses the current DSP buffer size (recommended for minimum latency).")]
[Tooltip("If true, mixes all output channels down to mono before FFT.")]
public bool mixToMono = true;
[Tooltip("Applies a Hann window before FFT (recommended).")]
public bool useHannWindow = true;
[Tooltip("If true, magnitude is converted to dBFS (log). Otherwise linear magnitude.")]
public bool outputDb = false;
public int CurrentFftSize => _fftSize;
public int SampleRate => _sampleRate;
public int DspBufferSize => _dspBufferSize;
// Latest published timestamp in samples (DSPClock at publish time)
public long LatestDspClockSamples => Interlocked.Read(ref _latestClockSamples);
// Estimated lower bound on analysis latency (seconds)
public double TheoreticalMinLatencySeconds => (_sampleRate > 0 && _dspBufferSize > 0)
? (double)_dspBufferSize / _sampleRate
// --- Internal state ---
private AudioOutputHandle _outputHandle;
private FftOutputJob _job; // struct copied into output system
private GCHandle _thisHandle;
private int _dspBufferSize;
private long _latestClockSamples;
// Double-buffered spectrum storage (main thread owns these arrays)
private NativeArray<float> _spectrumA;
private NativeArray<float> _spectrumB;
private volatile int _publishedIndex = -1; // 0 => A, 1 => B
// We expose a "pull" that copies into your buffer (no allocations).
// Returns false if no spectrum has been published yet.
public bool TryGetLatestSpectrum(NativeArray<float> destination, out long dspClockSamples)
dspClockSamples = LatestDspClockSamples;
int idx = _publishedIndex;
if (idx < 0) return false;
var src = (idx == 0) ? _spectrumA : _spectrumB;
if (!src.IsCreated || destination.Length != src.Length)
throw new ArgumentException("Destination length must equal FFT bin count (fftSize/2).");
NativeArray<float>.Copy(src, destination);
// Convenience overload for float[] (alloc-free if you reuse the array).
public bool TryGetLatestSpectrum(float[] destination, out long dspClockSamples)
dspClockSamples = LatestDspClockSamples;
int idx = _publishedIndex;
if (idx < 0) return false;
var src = (idx == 0) ? _spectrumA : _spectrumB;
if (!src.IsCreated || destination.Length != src.Length)
throw new ArgumentException("Destination length must equal FFT bin count (fftSize/2).");
// Pin 'this' so the job can call back into a stable pointer for publishing.
_thisHandle = GCHandle.Alloc(this, GCHandleType.Pinned);
// Create the output job and attach to default output.
// IAudioOutput requires Initialize/BeginMix/EndMix/Dispose. citeturn3view0
OwnerPtr = (IntPtr)_thisHandle.AddrOfPinnedObject(),
RequestedFftSize = fftSize,
MixToMono = mixToMono ? 1 : 0,
UseHannWindow = useHannWindow ? 1 : 0,
OutputDb = outputDb ? 1 : 0,
// Attach to Unity's default output. citeturn2search12
_outputHandle = _job.AttachToDefaultOutput();
// Dispose the output hook (this calls job.Dispose on the mixer thread).
if (_outputHandle.IsValid)
if (_thisHandle.IsAllocated)
if (_spectrumA.IsCreated) _spectrumA.Dispose();
if (_spectrumB.IsCreated) _spectrumB.Dispose();
// Called by the audio job (mixer thread) to publish results into the double buffer.
// IMPORTANT: Keep this tiny and allocation-free. No Unity API calls.
private unsafe void PublishSpectrum(float* mags, int binCount, long dspClockSamples)
// Lazily allocate spectrum buffers on the main object (still safe; uses unmanaged alloc).
// First publish will happen on mixer thread; allocations are NativeArray unmanaged, but
// you should still aim to have them created early. If you want zero risk, pre-create
// in Initialize via a custom bootstrap — this keeps the script "single file".
if (!_spectrumA.IsCreated)
_spectrumA = new NativeArray<float>(binCount, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
_spectrumB = new NativeArray<float>(binCount, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
// Write into the non-published buffer, then atomically flip.
int next = (_publishedIndex == 0) ? 1 : 0;
var dst = (next == 0) ? _spectrumA : _spectrumB;
// Copy from job memory into published buffer
Interlocked.Exchange(ref _latestClockSamples, dspClockSamples);
Volatile.Write(ref _publishedIndex, next);
// Mixer-thread job: hooks output, runs FFT per buffer.
private struct FftOutputJob : IAudioOutput
public int RequestedFftSize;
public int MixToMono; // 1/0
public int UseHannWindow; // 1/0
public int OutputDb; // 1/0
private int _dspBufferSize;
// Accumulator for time-domain window
private NativeArray<float> _time; // mono window (fftSize)
private int _timeWrite; // ring write head
// Complex FFT buffer (in-place)
private NativeArray<float2> _freq; // length fftSize
private NativeArray<float> _hann; // length fftSize
// Magnitude output (binCount)
private NativeArray<float> _mags; // job-local mags for publish
// Twiddle factors and bit-reversal indices
private NativeArray<float2> _twiddles; // length fftSize/2
private NativeArray<int> _bitRev; // length fftSize
public void Initialize(int channelCount, SoundFormat format, int sampleRate, long dspBufferSize)
_channels = channelCount;
_sampleRate = sampleRate;
_dspBufferSize = (int)dspBufferSize;
// Create a DSPGraph that matches the output device.
// DSPGraph.Create(outputFormat, outputChannels, dspBufferSize, sampleRate). citeturn1view1
_graph = DSPGraph.Create(format, channelCount, _dspBufferSize, _sampleRate);
// - If RequestedFftSize == 0: use DSP buffer size (best latency).
// - Else: clamp to power-of-two >= dspBufferSize (to avoid repeated partial windows).
_fftSize = RequestedFftSize > 0 ? NextPow2(math.max(RequestedFftSize, _dspBufferSize)) : NextPow2(_dspBufferSize);
_binCount = _fftSize / 2;
_time = new NativeArray<float>(_fftSize, Allocator.Persistent, NativeArrayOptions.ClearMemory);
_freq = new NativeArray<float2>(_fftSize, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
_mags = new NativeArray<float>(_binCount, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
_hann = new NativeArray<float>(_fftSize, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
for (int i = 0; i < _fftSize; i++)
// Hann: 0.5 - 0.5*cos(2*pi*n/(N-1))
_hann[i] = 0.5f - 0.5f * math.cos((2f * math.PI * i) / (_fftSize - 1));
// Precompute FFT helpers
_twiddles = new NativeArray<float2>(_fftSize / 2, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
for (int k = 0; k < _twiddles.Length; k++)
float phase = -2f * math.PI * k / _fftSize;
_twiddles[k] = new float2(math.cos(phase), math.sin(phase));
_bitRev = new NativeArray<int>(_fftSize, Allocator.Persistent, NativeArrayOptions.UninitializedMemory);
int bits = (int)math.log2(_fftSize);
for (int i = 0; i < _fftSize; i++)
_bitRev[i] = BitReverse(i, bits);
public void BeginMix(int frameCount)
// Mix frameCount frames in the graph (jobified by default).
_graph.BeginMix(frameCount);
public void EndMix(NativeArray<float> output, int frames)
// Read mixed audio into 'output' (Unity passes this buffer in).
// We fill it from the graph mix.
_graph.ReadMix(output, frames, _channels);
// Push samples into the mono window accumulator.
// frames == dsp buffer frames (typically).
int totalSamples = frames * _channels;
if (MixToMono != 0 && _channels > 1)
for (int f = 0; f < frames; f++)
int baseIdx = f * _channels;
for (int c = 0; c < _channels; c++)
sum += output[baseIdx + c];
float mono = sum / _channels;
_time[_timeWrite] = mono;
_timeWrite = (_timeWrite + 1) & (_fftSize - 1);
for (int f = 0; f < frames; f++)
float s = output[f * _channels];
_timeWrite = (_timeWrite + 1) & (_fftSize - 1);
// For minimum latency, run FFT once per output buffer.
// We build a contiguous window ending at the newest sample.
int start = _timeWrite; // oldest sample index in ring
for (int i = 0; i < _fftSize; i++)
int idx = (start + i) & (_fftSize - 1);
float w = (UseHannWindow != 0) ? _hann[i] : 1f;
_freq[i] = new float2(_time[idx] * w, 0f);
// FFT in-place (radix-2, iterative, Burst-friendly)
Radix2Fft.InPlace(_freq, _bitRev, _twiddles);
// Magnitude spectrum (bins 0..N/2-1), normalized by N
float invN = 1f / _fftSize;
for (int k = 0; k < _binCount; k++)
float mag = math.sqrt(z.x * z.x + z.y * z.y) * invN;
// dBFS-ish: 20*log10(mag), protect against log(0)
mag = 20f * math.log10(math.max(mag, 1e-12f));
// Publish to the MonoBehaviour (main object) with sample-accurate timestamp.
// DSPClock is "number of samples processed since the DSPGraph was created". citeturn1view1
long dspClockSamples = _graph.DSPClock;
var owner = (UltraLowLatencyFftAnalyzer*)OwnerPtr;
owner->PublishSpectrum((float*)_mags.GetUnsafeReadOnlyPtr(), _binCount, dspClockSamples);
if (_time.IsCreated) _time.Dispose();
if (_freq.IsCreated) _freq.Dispose();
if (_mags.IsCreated) _mags.Dispose();
if (_hann.IsCreated) _hann.Dispose();
if (_twiddles.IsCreated) _twiddles.Dispose();
if (_bitRev.IsCreated) _bitRev.Dispose();
private static int NextPow2(int v)
private static int BitReverse(int x, int bits)
for (int i = 0; i < bits; i++)
private static class Radix2Fft
// In-place iterative radix-2 FFT with:
// - explicit bit-reversal permutation
// - precomputed twiddles e^{-j 2pi k / N}
public static void InPlace(NativeArray<float2> data, NativeArray<int> bitRev, NativeArray<float2> twiddles)
// Bit-reversal permute into a scratchless in-place swap
for (int i = 0; i < n; i++)
// Cooley-Tukey iterative
for (int len = 2; len <= n; len <<= 1)
int step = n / len; // twiddle step
for (int i = 0; i < n; i += len)
for (int j = 0; j < half; j++)
float2 v = data[i + j + half];
// v * w (complex multiply)
data[i + j + half] = u - t;