123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684 |
- using Unity.Collections;
- using Unity.Collections.LowLevel.Unsafe;
- using System.Runtime.CompilerServices;
- using Unity.Mathematics;
- using static Unity.Mathematics.math;
- namespace UnityEngine.Rendering.Universal.Internal
- {
- // This structure is designed to be Burst friendly.
- // It can be copied by value.
- internal struct DeferredTiler
- {
- // Precomputed light data
- internal struct PrePunctualLight
- {
- // view-space position.
- public float3 posVS;
- // Radius in world unit.
- public float radius;
- // Distance between closest bound of the light and the camera. Used for sorting lights front-to-back.
- public float minDist;
- // Projected position of the sphere centre on the screen (near plane).
- public float2 screenPos;
- // Index into renderingData.lightData.visibleLights native array.
- public ushort visLightIndex;
- }
- enum ClipResult
- {
- Unknown,
- In,
- Out,
- }
- int m_TilePixelWidth;
- int m_TilePixelHeight;
- int m_TileXCount;
- int m_TileYCount;
- // Fixed header size in uint in m_TileHeader.
- // Only finest tiler requires to store extra per-tile information (light list depth range, bitmask for 2.5D culling).
- int m_TileHeaderSize;
- // Indicative average lights per tile. Only used when initializing the size of m_DataTile for the first time.
- int m_AvgLightPerTile;
- // 0, 1 or 2 (see DeferredConfig.kTilerDepth)
- int m_TilerLevel;
- // Camera frustum planes, adjusted to account for tile size.
- FrustumPlanes m_FrustumPlanes;
- // Are we dealing with an orthographic projection.
- bool m_IsOrthographic;
- // Atomic counters are put in a NativeArray so they can be accessed/shared from jobs.
- // [0] maxLightPerTile: Only valid for finest tiler: max light counter per tile. Reset every frame.
- // [1] tileDataSize: reset every frame.
- // [2] tileDataCapacity: extra amount of memory required by each tiler (depends on number of lights visible). Externally maintained.
- [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
- NativeArray<int> m_Counters;
- // Store all visible light indices for all tiles.
- // (currently) Contains sequential blocks of ushort values (light indices and optionally lightDepthRange), for each tile
- // For example for platforms using 16x16px tiles:
- // in a finest tiler DeferredLights.m_Tilers[0] ( 16x16px tiles), each tile will use a block of 1 * 1 * 32 = 32 ushort values
- // in an intermediate tiler DeferredLights.m_Tilers[1] ( 64x64px tiles), each tile will use a block of 4 * 4 * 32 = 512 ushort values
- // in a coarsest tiler DeferredLights.m_Tilers[2] (256x256px tiles), each tile will use a block of 16 * 16 * 32 = 8192 ushort values
- [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
- NativeArray<ushort> m_TileData;
- // Store tile header (fixed size per tile)
- // light offset, light count, optionally additional per-tile "header" values.
- [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
- NativeArray<uint> m_TileHeaders;
- // Precompute tile data.
- [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
- NativeArray<PreTile> m_PreTiles;
- public DeferredTiler(int tilePixelWidth, int tilePixelHeight, int avgLightPerTile, int tilerLevel)
- {
- m_TilePixelWidth = tilePixelWidth;
- m_TilePixelHeight = tilePixelHeight;
- m_TileXCount = 0;
- m_TileYCount = 0;
- // Finest tiler (at index 0) computes extra tile data stored into the header, so it requires more space. See CullFinalLights() vs CullIntermediateLights().
- // Finest tiler: lightListOffset, lightCount, listDepthRange, listBitMask
- // Coarse tilers: lightListOffset, lightCount
- m_TileHeaderSize = tilerLevel == 0 ? 4 : 2;
- m_AvgLightPerTile = avgLightPerTile;
- m_TilerLevel = tilerLevel;
- m_FrustumPlanes = new FrustumPlanes { left = 0, right = 0, bottom = 0, top = 0, zNear = 0, zFar = 0 };
- m_IsOrthographic = false;
- m_Counters = new NativeArray<int>();
- m_TileData = new NativeArray<ushort>();
- m_TileHeaders = new NativeArray<uint>();
- m_PreTiles = new NativeArray<PreTile>();
- }
- public int TilerLevel
- {
- get { return m_TilerLevel; }
- }
- public int TileXCount
- {
- get { return m_TileXCount; }
- }
- public int TileYCount
- {
- get { return m_TileYCount; }
- }
- public int TilePixelWidth
- {
- get { return m_TilePixelWidth; }
- }
- public int TilePixelHeight
- {
- get { return m_TilePixelHeight; }
- }
- public int TileHeaderSize
- {
- get { return m_TileHeaderSize; }
- }
- public int MaxLightPerTile
- {
- get { return m_Counters.IsCreated ? m_Counters[0] : 0; }
- }
- public int TileDataCapacity
- {
- get { return m_Counters.IsCreated ? m_Counters[2] : 0; }
- }
- public NativeArray<ushort> Tiles
- {
- get { return m_TileData; }
- }
- public NativeArray<uint> TileHeaders
- {
- get { return m_TileHeaders; }
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void GetTileOffsetAndCount(int i, int j, out int offset, out int count)
- {
- int headerOffset = GetTileHeaderOffset(i, j);
- offset = (int)m_TileHeaders[headerOffset + 0];
- count = (int)m_TileHeaders[headerOffset + 1];
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public int GetTileHeaderOffset(int i, int j)
- {
- return (i + j * m_TileXCount) * m_TileHeaderSize;
- }
- public void Setup(int tileDataCapacity)
- {
- if (tileDataCapacity <= 0)
- tileDataCapacity = m_TileXCount * m_TileYCount * m_AvgLightPerTile;
- m_Counters = new NativeArray<int>(3, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
- m_TileData = new NativeArray<ushort>(tileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
- m_TileHeaders = new NativeArray<uint>(m_TileXCount * m_TileYCount * m_TileHeaderSize, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
- m_Counters[0] = 0;
- m_Counters[1] = 0;
- m_Counters[2] = tileDataCapacity;
- }
- public void OnCameraCleanup()
- {
- if (m_TileHeaders.IsCreated)
- m_TileHeaders.Dispose();
- if (m_TileData.IsCreated)
- m_TileData.Dispose();
- if (m_Counters.IsCreated)
- m_Counters.Dispose();
- }
- public void PrecomputeTiles(Matrix4x4 proj, bool isOrthographic, int renderWidth, int renderHeight)
- {
- m_TileXCount = (renderWidth + m_TilePixelWidth - 1) / m_TilePixelWidth;
- m_TileYCount = (renderHeight + m_TilePixelHeight - 1) / m_TilePixelHeight;
- m_PreTiles = DeferredShaderData.instance.GetPreTiles(m_TilerLevel, m_TileXCount * m_TileYCount);
- // Adjust render width and height to account for tile size expanding over the screen (tiles have a fixed pixel size).
- int adjustedRenderWidth = Align(renderWidth, m_TilePixelWidth);
- int adjustedRenderHeight = Align(renderHeight, m_TilePixelHeight);
- // Now adjust the right and bottom clipping planes.
- m_FrustumPlanes = proj.decomposeProjection;
- m_FrustumPlanes.right = m_FrustumPlanes.left + (m_FrustumPlanes.right - m_FrustumPlanes.left) * (adjustedRenderWidth / (float)renderWidth);
- m_FrustumPlanes.bottom = m_FrustumPlanes.top + (m_FrustumPlanes.bottom - m_FrustumPlanes.top) * (adjustedRenderHeight / (float)renderHeight);
- m_IsOrthographic = isOrthographic;
- // Tile size in world units.
- float tileWidthWS = (m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount;
- float tileHeightWS = (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount;
- if (!isOrthographic) // perspective
- {
- for (int j = 0; j < m_TileYCount; ++j)
- {
- float tileTop = m_FrustumPlanes.top - tileHeightWS * j;
- float tileBottom = tileTop - tileHeightWS;
- for (int i = 0; i < m_TileXCount; ++i)
- {
- float tileLeft = m_FrustumPlanes.left + tileWidthWS * i;
- float tileRight = tileLeft + tileWidthWS;
- // Camera view space is always OpenGL RH coordinates system.
- // In view space with perspective projection, all planes pass by (0,0,0).
- PreTile preTile;
- preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear));
- preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear));
- preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear));
- preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear));
- m_PreTiles[i + j * m_TileXCount] = preTile;
- }
- }
- }
- else
- {
- for (int j = 0; j < m_TileYCount; ++j)
- {
- float tileTop = m_FrustumPlanes.top - tileHeightWS * j;
- float tileBottom = tileTop - tileHeightWS;
- for (int i = 0; i < m_TileXCount; ++i)
- {
- float tileLeft = m_FrustumPlanes.left + tileWidthWS * i;
- float tileRight = tileLeft + tileWidthWS;
- // Camera view space is always OpenGL RH coordinates system.
- PreTile preTile;
- preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear));
- preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear));
- preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear));
- preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear));
- m_PreTiles[i + j * m_TileXCount] = preTile;
- }
- }
- }
- }
- // This differs from CullIntermediateLights in 3 ways:
- // - tile-frustums/light intersection use different algorithm
- // - depth range of the light shape intersecting the tile-frustums is output in the tile list header section
- // - light indices written out are indexing visible_lights, rather than the array of PrePunctualLights.
- unsafe public void CullFinalLights(ref NativeArray<PrePunctualLight> punctualLights,
- ref NativeArray<ushort> lightIndices, int lightStartIndex, int lightCount,
- int istart, int iend, int jstart, int jend)
- {
- // Interestingly, 2-3% faster when using unsafe arrays.
- PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights);
- ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices);
- uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders);
- if (lightCount == 0)
- {
- for (int j = jstart; j < jend; ++j)
- for (int i = istart; i < iend; ++i)
- {
- int headerOffset = GetTileHeaderOffset(i, j);
- _tileHeaders[headerOffset + 0] = 0;
- _tileHeaders[headerOffset + 1] = 0;
- _tileHeaders[headerOffset + 2] = 0;
- _tileHeaders[headerOffset + 3] = 0;
- }
- return;
- }
- // Store culled lights in temporary buffer. Additionally store depth range of each light for a given tile too.
- // the depth range is a 32bit mask, but packed into a 16bits value since the range of the light is continuous
- // (only need to store first bit enabled, and count of enabled bits).
- ushort* tiles = stackalloc ushort[lightCount * 2];
- float2* depthRanges = stackalloc float2[lightCount];
- int maxLightPerTile = 0; // for stats
- int lightEndIndex = lightStartIndex + lightCount;
- float2 tileSize = new float2((m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount, (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount);
- float2 tileExtents = tileSize * 0.5f;
- float2 tileExtentsInv = new float2(1.0f / tileExtents.x, 1.0f / tileExtents.y);
- for (int j = jstart; j < jend; ++j)
- {
- float tileYCentre = m_FrustumPlanes.top - (tileExtents.y + j * tileSize.y);
- for (int i = istart; i < iend; ++i)
- {
- float tileXCentre = m_FrustumPlanes.left + tileExtents.x + i * tileSize.x;
- PreTile preTile = m_PreTiles[i + j * m_TileXCount];
- int culledLightCount = 0;
- // For the current tile's light list, min&max depth range (absolute values).
- float listMinDepth = float.MaxValue;
- float listMaxDepth = -float.MaxValue;
- // Duplicate the inner loop twice. Testing for the ortographic case inside the inner loop would cost an extra 8% otherwise.
- // Missing C++ template argument here!
- if (!m_IsOrthographic)
- {
- for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
- {
- ushort lightIndex = _lightIndices[vi];
- PrePunctualLight ppl = _punctualLights[lightIndex];
- // Offset tileCentre toward the light to calculate a more conservative minMax depth bound,
- // but it must remains inside the tile and must not pass further than the light centre.
- float2 tileCentre = new float2(tileXCentre, tileYCentre);
- float2 dir = ppl.screenPos - tileCentre;
- float2 d = abs(dir * tileExtentsInv);
- float sInv = 1.0f / max3(d.x, d.y, 1.0f);
- float3 tileOffCentre = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, -m_FrustumPlanes.zNear);
- float3 tileOrigin = new float3(0.0f);
- float t0, t1;
- // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile.
- if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1))
- continue;
- listMinDepth = listMinDepth < t0 ? listMinDepth : t0;
- listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1;
- depthRanges[culledLightCount] = new float2(t0, t1);
- // Because this always output to the finest tiles, contrary to CullLights(),
- // the result are indices into visibleLights, instead of indices into punctualLights.
- tiles[culledLightCount] = ppl.visLightIndex;
- ++culledLightCount;
- }
- }
- else
- {
- for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
- {
- ushort lightIndex = _lightIndices[vi];
- PrePunctualLight ppl = _punctualLights[lightIndex];
- // Offset tileCentre toward the light to calculate a more conservative minMax depth bound,
- // but it must remains inside the tile and must not pass further than the light centre.
- float2 tileCentre = new float2(tileXCentre, tileYCentre);
- float2 dir = ppl.screenPos - tileCentre;
- float2 d = abs(dir * tileExtentsInv);
- float sInv = 1.0f / max3(d.x, d.y, 1.0f);
- float3 tileOffCentre = new float3(0, 0, -m_FrustumPlanes.zNear);
- float3 tileOrigin = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, 0.0f);
- float t0, t1;
- // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile.
- if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1))
- continue;
- listMinDepth = listMinDepth < t0 ? listMinDepth : t0;
- listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1;
- depthRanges[culledLightCount] = new float2(t0, t1);
- // Because this always output to the finest tiles, contrary to CullLights(),
- // the result are indices into visibleLights, instead of indices into punctualLights.
- tiles[culledLightCount] = ppl.visLightIndex;
- ++culledLightCount;
- }
- }
- // Post-multiply by zNear to get actual world unit absolute depth values, then clamp to valid depth range.
- listMinDepth = max2(listMinDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear);
- listMaxDepth = min2(listMaxDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar);
- // Calculate bitmask for 2.5D culling.
- uint bitMask = 0;
- float depthRangeInv = 1.0f / (listMaxDepth - listMinDepth);
- for (int culledLightIndex = 0; culledLightIndex < culledLightCount; ++culledLightIndex)
- {
- float lightMinDepth = max2(depthRanges[culledLightIndex].x * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear);
- float lightMaxDepth = min2(depthRanges[culledLightIndex].y * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar);
- int firstBit = (int)((lightMinDepth - listMinDepth) * 32.0f * depthRangeInv);
- int lastBit = (int)((lightMaxDepth - listMinDepth) * 32.0f * depthRangeInv);
- int bitCount = min(lastBit - firstBit + 1, 32 - firstBit);
- bitMask |= (uint)((0xFFFFFFFF >> (32 - bitCount)) << firstBit);
- tiles[culledLightCount + culledLightIndex] = (ushort)((uint)firstBit | (uint)(bitCount << 8));
- }
- // As listMinDepth and listMaxDepth are used to calculate the geometry 2.5D bitmask,
- // we can optimize the shader execution (TileDepthInfo.shader) by refactoring the calculation.
- // int bitIndex = 32.0h * (geoDepth - listMinDepth) / (listMaxDepth - listMinDepth);
- // Equivalent to:
- // a = 32.0 / (listMaxDepth - listMinDepth)
- // b = -listMinDepth * 32.0 / (listMaxDepth - listMinDepth)
- // int bitIndex = geoDepth * a + b;
- float a = 32.0f * depthRangeInv;
- float b = -listMinDepth * a;
- int tileDataSize = culledLightCount * 2;
- int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref tileDataSize) : 0;
- int headerOffset = GetTileHeaderOffset(i, j);
- _tileHeaders[headerOffset + 0] = (uint)tileOffset;
- _tileHeaders[headerOffset + 1] = (uint)(tileDataSize == 0 ? 0 : culledLightCount);
- _tileHeaders[headerOffset + 2] = _f32tof16(a) | (_f32tof16(b) << 16);
- _tileHeaders[headerOffset + 3] = bitMask;
- maxLightPerTile = max(maxLightPerTile, culledLightCount);
- }
- }
- m_Counters[0] = max(m_Counters[0], maxLightPerTile); // TODO make it atomic
- }
- // TODO: finer culling for spot lights
- unsafe public void CullIntermediateLights(ref NativeArray<PrePunctualLight> punctualLights,
- ref NativeArray<ushort> lightIndices, int lightStartIndex, int lightCount,
- int istart, int iend, int jstart, int jend)
- {
- // Interestingly, 2-3% faster when using unsafe arrays.
- PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights);
- ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices);
- uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders);
- if (lightCount == 0)
- {
- for (int j = jstart; j < jend; ++j)
- for (int i = istart; i < iend; ++i)
- {
- int headerOffset = GetTileHeaderOffset(i, j);
- _tileHeaders[headerOffset + 0] = 0;
- _tileHeaders[headerOffset + 1] = 0;
- }
- return;
- }
- // Store culled result in temporary buffer.
- ushort* tiles = stackalloc ushort[lightCount];
- int lightEndIndex = lightStartIndex + lightCount;
- for (int j = jstart; j < jend; ++j)
- {
- for (int i = istart; i < iend; ++i)
- {
- PreTile preTile = m_PreTiles[i + j * m_TileXCount];
- int culledLightCount = 0;
- for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
- {
- ushort lightIndex = _lightIndices[vi];
- PrePunctualLight ppl = _punctualLights[lightIndex];
- // This is slightly faster than IntersectionLineSphere().
- if (!Clip(ref preTile, ppl.posVS, ppl.radius))
- continue;
- tiles[culledLightCount] = lightIndex;
- ++culledLightCount;
- }
- // Copy the culled light list.
- int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref culledLightCount) : 0;
- int headerOffset = GetTileHeaderOffset(i, j);
- _tileHeaders[headerOffset + 0] = (uint)tileOffset;
- _tileHeaders[headerOffset + 1] = (uint)culledLightCount;
- }
- }
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- unsafe int AddTileData(ushort* lightData, ref int size)
- {
- int* _Counters = (int*)m_Counters.GetUnsafePtr();
- int tileDataSize = System.Threading.Interlocked.Add(ref _Counters[1], size);
- int offset = tileDataSize - size;
- if (tileDataSize <= m_TileData.Length)
- {
- ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr();
- UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2);
- return offset;
- }
- else
- {
- // Buffer overflow. Ignore data to add.
- // Gracefully increasing the buffer size is possible but costs extra CPU time (see commented code below) due to the needed critical section.
- m_Counters[2] = max(m_Counters[2], tileDataSize); // use an atomic max instead?
- size = 0;
- return 0;
- }
- /*
- lock (this)
- {
- int offset = m_TileDataSize;
- m_TileDataSize += size;
- ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr();
- if (m_TileDataSize > m_TileDataCapacity)
- {
- m_TileDataCapacity = max(m_TileDataSize, m_TileDataCapacity * 2);
- NativeArray<ushort> newTileData = new NativeArray<ushort>(m_TileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
- ushort* _newTileData = (ushort*)newTileData.GetUnsafePtr();
- UnsafeUtility.MemCpy(_newTileData, _TileData, offset * 2);
- m_TileData.Dispose();
- m_TileData = newTileData;
- _TileData = _newTileData;
- }
- UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2);
- return offset;
- }
- */
- }
- // Return parametric intersection between a sphere and a line.
- // The intersections points P0 and P1 are:
- // P0 = raySource + rayDirection * t0.
- // P1 = raySource + rayDirection * t1.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- unsafe static bool IntersectionLineSphere(float3 centre, float radius, float3 raySource, float3 rayDirection, out float t0, out float t1)
- {
- float A = dot(rayDirection, rayDirection); // always >= 0
- float B = dot(raySource - centre, rayDirection);
- float C = dot(raySource, raySource)
- + dot(centre, centre)
- - (radius * radius)
- - 2 * dot(raySource, centre);
- float discriminant = (B * B) - A * C;
- if (discriminant > 0)
- {
- float sqrt_discriminant = sqrt(discriminant);
- float A_inv = 1.0f / A;
- t0 = (-B - sqrt_discriminant) * A_inv;
- t1 = (-B + sqrt_discriminant) * A_inv;
- return true;
- }
- else
- {
- t0 = 0.0f; // invalid
- t1 = 0.0f; // invalid
- return false;
- }
- }
- // Clip a sphere against a 2D tile. Near and far planes are ignored (already tested).
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static bool Clip(ref PreTile tile, float3 posVS, float radius)
- {
- // Simplified clipping code, only deals with 4 clipping planes.
- // zNear and zFar clipping planes are ignored as presumably the light is already visible to the camera frustum.
- float radiusSq = radius * radius;
- int insideCount = 0;
- ClipResult res;
- res = ClipPartial(tile.planeLeft, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount);
- if (res != ClipResult.Unknown)
- return res == ClipResult.In;
- res = ClipPartial(tile.planeRight, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount);
- if (res != ClipResult.Unknown)
- return res == ClipResult.In;
- res = ClipPartial(tile.planeTop, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount);
- if (res != ClipResult.Unknown)
- return res == ClipResult.In;
- res = ClipPartial(tile.planeBottom, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount);
- if (res != ClipResult.Unknown)
- return res == ClipResult.In;
- return insideCount == 4;
- }
- // Internal function to clip against 1 plane of a cube, with additional 2 side planes for false-positive detection (normally 4 planes, but near and far planes are ignored).
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static ClipResult ClipPartial(float4 plane, float4 sidePlaneA, float4 sidePlaneB, float3 posVS, float radius, float radiusSq, ref int insideCount)
- {
- float d = DistanceToPlane(plane, posVS);
- if (d + radius <= 0.0f) // completely outside
- return ClipResult.Out;
- else if (d < 0.0f) // intersection: further check: only need to consider case where more than half the sphere is outside
- {
- float3 p = posVS - plane.xyz * d;
- float rSq = radiusSq - d * d;
- if (SignedSq(DistanceToPlane(sidePlaneA, p)) >= -rSq
- && SignedSq(DistanceToPlane(sidePlaneB, p)) >= -rSq)
- return ClipResult.In;
- }
- else // consider as good as completely inside
- ++insideCount;
- return ClipResult.Unknown;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float4 MakePlane(float3 pb, float3 pc)
- {
- float3 v0 = pb;
- float3 v1 = pc;
- float3 n = cross(v0, v1);
- n = normalize(n);
- // The planes pass all by the origin.
- return new float4(n.x, n.y, n.z, 0.0f);
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float4 MakePlane(float3 pa, float3 pb, float3 pc)
- {
- float3 v0 = pb - pa;
- float3 v1 = pc - pa;
- float3 n = cross(v0, v1);
- n = normalize(n);
- return new float4(n.x, n.y, n.z, -dot(n, pa));
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float DistanceToPlane(float4 plane, float3 p)
- {
- return plane.x * p.x + plane.y * p.y + plane.z * p.z + plane.w;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float SignedSq(float f)
- {
- // slower!
- //return Mathf.Sign(f) * (f * f);
- return (f < 0.0f ? -1.0f : 1.0f) * (f * f);
- }
- // Unity.Mathematics.max() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())!
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float min2(float a, float b)
- {
- return a < b ? a : b;
- }
- // Unity.Mathematics.min() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())!
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float max2(float a, float b)
- {
- return a > b ? a : b;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static float max3(float a, float b, float c)
- {
- return a > b ? (a > c ? a : c) : (b > c ? b : c);
- }
- // This is copy-pasted from Unity.Mathematics.math.f32tof16(), but use min2() function that does not check for NaN (which would consume 10% of the execution time of CullFinalLights()).
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static uint _f32tof16(float x)
- {
- const int infinity_32 = 255 << 23;
- const uint msk = 0x7FFFF000u;
- uint ux = asuint(x);
- uint uux = ux & msk;
- uint h = (uint)(asuint(min2(asfloat(uux) * 1.92592994e-34f, 260042752.0f)) + 0x1000) >> 13; // Clamp to signed infinity if overflowed
- h = select(h, select(0x7c00u, 0x7e00u, (int)uux > infinity_32), (int)uux >= infinity_32); // NaN->qNaN and Inf->Inf
- return h | (ux & ~msk) >> 16;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- static int Align(int s, int alignment)
- {
- return ((s + alignment - 1) / alignment) * alignment;
- }
- }
- }
|