DeferredTiler.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. using Unity.Collections;
  2. using Unity.Collections.LowLevel.Unsafe;
  3. using System.Runtime.CompilerServices;
  4. using Unity.Mathematics;
  5. using static Unity.Mathematics.math;
  6. namespace UnityEngine.Rendering.Universal.Internal
  7. {
  8. // This structure is designed to be Burst friendly.
  9. // It can be copied by value.
  10. internal struct DeferredTiler
  11. {
  12. // Precomputed light data
  13. internal struct PrePunctualLight
  14. {
  15. // view-space position.
  16. public float3 posVS;
  17. // Radius in world unit.
  18. public float radius;
  19. // Distance between closest bound of the light and the camera. Used for sorting lights front-to-back.
  20. public float minDist;
  21. // Projected position of the sphere centre on the screen (near plane).
  22. public float2 screenPos;
  23. // Index into renderingData.lightData.visibleLights native array.
  24. public ushort visLightIndex;
  25. }
  26. enum ClipResult
  27. {
  28. Unknown,
  29. In,
  30. Out,
  31. }
  32. int m_TilePixelWidth;
  33. int m_TilePixelHeight;
  34. int m_TileXCount;
  35. int m_TileYCount;
  36. // Fixed header size in uint in m_TileHeader.
  37. // Only finest tiler requires to store extra per-tile information (light list depth range, bitmask for 2.5D culling).
  38. int m_TileHeaderSize;
  39. // Indicative average lights per tile. Only used when initializing the size of m_DataTile for the first time.
  40. int m_AvgLightPerTile;
  41. // 0, 1 or 2 (see DeferredConfig.kTilerDepth)
  42. int m_TilerLevel;
  43. // Camera frustum planes, adjusted to account for tile size.
  44. FrustumPlanes m_FrustumPlanes;
  45. // Are we dealing with an orthographic projection.
  46. bool m_IsOrthographic;
  47. // Atomic counters are put in a NativeArray so they can be accessed/shared from jobs.
  48. // [0] maxLightPerTile: Only valid for finest tiler: max light counter per tile. Reset every frame.
  49. // [1] tileDataSize: reset every frame.
  50. // [2] tileDataCapacity: extra amount of memory required by each tiler (depends on number of lights visible). Externally maintained.
  51. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
  52. NativeArray<int> m_Counters;
  53. // Store all visible light indices for all tiles.
  54. // (currently) Contains sequential blocks of ushort values (light indices and optionally lightDepthRange), for each tile
  55. // For example for platforms using 16x16px tiles:
  56. // in a finest tiler DeferredLights.m_Tilers[0] ( 16x16px tiles), each tile will use a block of 1 * 1 * 32 = 32 ushort values
  57. // in an intermediate tiler DeferredLights.m_Tilers[1] ( 64x64px tiles), each tile will use a block of 4 * 4 * 32 = 512 ushort values
  58. // in a coarsest tiler DeferredLights.m_Tilers[2] (256x256px tiles), each tile will use a block of 16 * 16 * 32 = 8192 ushort values
  59. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
  60. NativeArray<ushort> m_TileData;
  61. // Store tile header (fixed size per tile)
  62. // light offset, light count, optionally additional per-tile "header" values.
  63. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
  64. NativeArray<uint> m_TileHeaders;
  65. // Precompute tile data.
  66. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction]
  67. NativeArray<PreTile> m_PreTiles;
  68. public DeferredTiler(int tilePixelWidth, int tilePixelHeight, int avgLightPerTile, int tilerLevel)
  69. {
  70. m_TilePixelWidth = tilePixelWidth;
  71. m_TilePixelHeight = tilePixelHeight;
  72. m_TileXCount = 0;
  73. m_TileYCount = 0;
  74. // Finest tiler (at index 0) computes extra tile data stored into the header, so it requires more space. See CullFinalLights() vs CullIntermediateLights().
  75. // Finest tiler: lightListOffset, lightCount, listDepthRange, listBitMask
  76. // Coarse tilers: lightListOffset, lightCount
  77. m_TileHeaderSize = tilerLevel == 0 ? 4 : 2;
  78. m_AvgLightPerTile = avgLightPerTile;
  79. m_TilerLevel = tilerLevel;
  80. m_FrustumPlanes = new FrustumPlanes { left = 0, right = 0, bottom = 0, top = 0, zNear = 0, zFar = 0 };
  81. m_IsOrthographic = false;
  82. m_Counters = new NativeArray<int>();
  83. m_TileData = new NativeArray<ushort>();
  84. m_TileHeaders = new NativeArray<uint>();
  85. m_PreTiles = new NativeArray<PreTile>();
  86. }
  87. public int TilerLevel
  88. {
  89. get { return m_TilerLevel; }
  90. }
  91. public int TileXCount
  92. {
  93. get { return m_TileXCount; }
  94. }
  95. public int TileYCount
  96. {
  97. get { return m_TileYCount; }
  98. }
  99. public int TilePixelWidth
  100. {
  101. get { return m_TilePixelWidth; }
  102. }
  103. public int TilePixelHeight
  104. {
  105. get { return m_TilePixelHeight; }
  106. }
  107. public int TileHeaderSize
  108. {
  109. get { return m_TileHeaderSize; }
  110. }
  111. public int MaxLightPerTile
  112. {
  113. get { return m_Counters.IsCreated ? m_Counters[0] : 0; }
  114. }
  115. public int TileDataCapacity
  116. {
  117. get { return m_Counters.IsCreated ? m_Counters[2] : 0; }
  118. }
  119. public NativeArray<ushort> Tiles
  120. {
  121. get { return m_TileData; }
  122. }
  123. public NativeArray<uint> TileHeaders
  124. {
  125. get { return m_TileHeaders; }
  126. }
  127. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  128. public void GetTileOffsetAndCount(int i, int j, out int offset, out int count)
  129. {
  130. int headerOffset = GetTileHeaderOffset(i, j);
  131. offset = (int)m_TileHeaders[headerOffset + 0];
  132. count = (int)m_TileHeaders[headerOffset + 1];
  133. }
  134. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  135. public int GetTileHeaderOffset(int i, int j)
  136. {
  137. return (i + j * m_TileXCount) * m_TileHeaderSize;
  138. }
  139. public void Setup(int tileDataCapacity)
  140. {
  141. if (tileDataCapacity <= 0)
  142. tileDataCapacity = m_TileXCount * m_TileYCount * m_AvgLightPerTile;
  143. m_Counters = new NativeArray<int>(3, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  144. m_TileData = new NativeArray<ushort>(tileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  145. m_TileHeaders = new NativeArray<uint>(m_TileXCount * m_TileYCount * m_TileHeaderSize, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  146. m_Counters[0] = 0;
  147. m_Counters[1] = 0;
  148. m_Counters[2] = tileDataCapacity;
  149. }
  150. public void OnCameraCleanup()
  151. {
  152. if (m_TileHeaders.IsCreated)
  153. m_TileHeaders.Dispose();
  154. if (m_TileData.IsCreated)
  155. m_TileData.Dispose();
  156. if (m_Counters.IsCreated)
  157. m_Counters.Dispose();
  158. }
  159. public void PrecomputeTiles(Matrix4x4 proj, bool isOrthographic, int renderWidth, int renderHeight)
  160. {
  161. m_TileXCount = (renderWidth + m_TilePixelWidth - 1) / m_TilePixelWidth;
  162. m_TileYCount = (renderHeight + m_TilePixelHeight - 1) / m_TilePixelHeight;
  163. m_PreTiles = DeferredShaderData.instance.GetPreTiles(m_TilerLevel, m_TileXCount * m_TileYCount);
  164. // Adjust render width and height to account for tile size expanding over the screen (tiles have a fixed pixel size).
  165. int adjustedRenderWidth = Align(renderWidth, m_TilePixelWidth);
  166. int adjustedRenderHeight = Align(renderHeight, m_TilePixelHeight);
  167. // Now adjust the right and bottom clipping planes.
  168. m_FrustumPlanes = proj.decomposeProjection;
  169. m_FrustumPlanes.right = m_FrustumPlanes.left + (m_FrustumPlanes.right - m_FrustumPlanes.left) * (adjustedRenderWidth / (float)renderWidth);
  170. m_FrustumPlanes.bottom = m_FrustumPlanes.top + (m_FrustumPlanes.bottom - m_FrustumPlanes.top) * (adjustedRenderHeight / (float)renderHeight);
  171. m_IsOrthographic = isOrthographic;
  172. // Tile size in world units.
  173. float tileWidthWS = (m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount;
  174. float tileHeightWS = (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount;
  175. if (!isOrthographic) // perspective
  176. {
  177. for (int j = 0; j < m_TileYCount; ++j)
  178. {
  179. float tileTop = m_FrustumPlanes.top - tileHeightWS * j;
  180. float tileBottom = tileTop - tileHeightWS;
  181. for (int i = 0; i < m_TileXCount; ++i)
  182. {
  183. float tileLeft = m_FrustumPlanes.left + tileWidthWS * i;
  184. float tileRight = tileLeft + tileWidthWS;
  185. // Camera view space is always OpenGL RH coordinates system.
  186. // In view space with perspective projection, all planes pass by (0,0,0).
  187. PreTile preTile;
  188. preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear));
  189. preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear));
  190. preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear));
  191. preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear));
  192. m_PreTiles[i + j * m_TileXCount] = preTile;
  193. }
  194. }
  195. }
  196. else
  197. {
  198. for (int j = 0; j < m_TileYCount; ++j)
  199. {
  200. float tileTop = m_FrustumPlanes.top - tileHeightWS * j;
  201. float tileBottom = tileTop - tileHeightWS;
  202. for (int i = 0; i < m_TileXCount; ++i)
  203. {
  204. float tileLeft = m_FrustumPlanes.left + tileWidthWS * i;
  205. float tileRight = tileLeft + tileWidthWS;
  206. // Camera view space is always OpenGL RH coordinates system.
  207. PreTile preTile;
  208. preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear));
  209. preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear));
  210. preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear));
  211. preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear));
  212. m_PreTiles[i + j * m_TileXCount] = preTile;
  213. }
  214. }
  215. }
  216. }
  217. // This differs from CullIntermediateLights in 3 ways:
  218. // - tile-frustums/light intersection use different algorithm
  219. // - depth range of the light shape intersecting the tile-frustums is output in the tile list header section
  220. // - light indices written out are indexing visible_lights, rather than the array of PrePunctualLights.
  221. unsafe public void CullFinalLights(ref NativeArray<PrePunctualLight> punctualLights,
  222. ref NativeArray<ushort> lightIndices, int lightStartIndex, int lightCount,
  223. int istart, int iend, int jstart, int jend)
  224. {
  225. // Interestingly, 2-3% faster when using unsafe arrays.
  226. PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights);
  227. ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices);
  228. uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders);
  229. if (lightCount == 0)
  230. {
  231. for (int j = jstart; j < jend; ++j)
  232. for (int i = istart; i < iend; ++i)
  233. {
  234. int headerOffset = GetTileHeaderOffset(i, j);
  235. _tileHeaders[headerOffset + 0] = 0;
  236. _tileHeaders[headerOffset + 1] = 0;
  237. _tileHeaders[headerOffset + 2] = 0;
  238. _tileHeaders[headerOffset + 3] = 0;
  239. }
  240. return;
  241. }
  242. // Store culled lights in temporary buffer. Additionally store depth range of each light for a given tile too.
  243. // the depth range is a 32bit mask, but packed into a 16bits value since the range of the light is continuous
  244. // (only need to store first bit enabled, and count of enabled bits).
  245. ushort* tiles = stackalloc ushort[lightCount * 2];
  246. float2* depthRanges = stackalloc float2[lightCount];
  247. int maxLightPerTile = 0; // for stats
  248. int lightEndIndex = lightStartIndex + lightCount;
  249. float2 tileSize = new float2((m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount, (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount);
  250. float2 tileExtents = tileSize * 0.5f;
  251. float2 tileExtentsInv = new float2(1.0f / tileExtents.x, 1.0f / tileExtents.y);
  252. for (int j = jstart; j < jend; ++j)
  253. {
  254. float tileYCentre = m_FrustumPlanes.top - (tileExtents.y + j * tileSize.y);
  255. for (int i = istart; i < iend; ++i)
  256. {
  257. float tileXCentre = m_FrustumPlanes.left + tileExtents.x + i * tileSize.x;
  258. PreTile preTile = m_PreTiles[i + j * m_TileXCount];
  259. int culledLightCount = 0;
  260. // For the current tile's light list, min&max depth range (absolute values).
  261. float listMinDepth = float.MaxValue;
  262. float listMaxDepth = -float.MaxValue;
  263. // Duplicate the inner loop twice. Testing for the ortographic case inside the inner loop would cost an extra 8% otherwise.
  264. // Missing C++ template argument here!
  265. if (!m_IsOrthographic)
  266. {
  267. for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
  268. {
  269. ushort lightIndex = _lightIndices[vi];
  270. PrePunctualLight ppl = _punctualLights[lightIndex];
  271. // Offset tileCentre toward the light to calculate a more conservative minMax depth bound,
  272. // but it must remains inside the tile and must not pass further than the light centre.
  273. float2 tileCentre = new float2(tileXCentre, tileYCentre);
  274. float2 dir = ppl.screenPos - tileCentre;
  275. float2 d = abs(dir * tileExtentsInv);
  276. float sInv = 1.0f / max3(d.x, d.y, 1.0f);
  277. float3 tileOffCentre = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, -m_FrustumPlanes.zNear);
  278. float3 tileOrigin = new float3(0.0f);
  279. float t0, t1;
  280. // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile.
  281. if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1))
  282. continue;
  283. listMinDepth = listMinDepth < t0 ? listMinDepth : t0;
  284. listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1;
  285. depthRanges[culledLightCount] = new float2(t0, t1);
  286. // Because this always output to the finest tiles, contrary to CullLights(),
  287. // the result are indices into visibleLights, instead of indices into punctualLights.
  288. tiles[culledLightCount] = ppl.visLightIndex;
  289. ++culledLightCount;
  290. }
  291. }
  292. else
  293. {
  294. for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
  295. {
  296. ushort lightIndex = _lightIndices[vi];
  297. PrePunctualLight ppl = _punctualLights[lightIndex];
  298. // Offset tileCentre toward the light to calculate a more conservative minMax depth bound,
  299. // but it must remains inside the tile and must not pass further than the light centre.
  300. float2 tileCentre = new float2(tileXCentre, tileYCentre);
  301. float2 dir = ppl.screenPos - tileCentre;
  302. float2 d = abs(dir * tileExtentsInv);
  303. float sInv = 1.0f / max3(d.x, d.y, 1.0f);
  304. float3 tileOffCentre = new float3(0, 0, -m_FrustumPlanes.zNear);
  305. float3 tileOrigin = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, 0.0f);
  306. float t0, t1;
  307. // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile.
  308. if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1))
  309. continue;
  310. listMinDepth = listMinDepth < t0 ? listMinDepth : t0;
  311. listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1;
  312. depthRanges[culledLightCount] = new float2(t0, t1);
  313. // Because this always output to the finest tiles, contrary to CullLights(),
  314. // the result are indices into visibleLights, instead of indices into punctualLights.
  315. tiles[culledLightCount] = ppl.visLightIndex;
  316. ++culledLightCount;
  317. }
  318. }
  319. // Post-multiply by zNear to get actual world unit absolute depth values, then clamp to valid depth range.
  320. listMinDepth = max2(listMinDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear);
  321. listMaxDepth = min2(listMaxDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar);
  322. // Calculate bitmask for 2.5D culling.
  323. uint bitMask = 0;
  324. float depthRangeInv = 1.0f / (listMaxDepth - listMinDepth);
  325. for (int culledLightIndex = 0; culledLightIndex < culledLightCount; ++culledLightIndex)
  326. {
  327. float lightMinDepth = max2(depthRanges[culledLightIndex].x * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear);
  328. float lightMaxDepth = min2(depthRanges[culledLightIndex].y * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar);
  329. int firstBit = (int)((lightMinDepth - listMinDepth) * 32.0f * depthRangeInv);
  330. int lastBit = (int)((lightMaxDepth - listMinDepth) * 32.0f * depthRangeInv);
  331. int bitCount = min(lastBit - firstBit + 1, 32 - firstBit);
  332. bitMask |= (uint)((0xFFFFFFFF >> (32 - bitCount)) << firstBit);
  333. tiles[culledLightCount + culledLightIndex] = (ushort)((uint)firstBit | (uint)(bitCount << 8));
  334. }
  335. // As listMinDepth and listMaxDepth are used to calculate the geometry 2.5D bitmask,
  336. // we can optimize the shader execution (TileDepthInfo.shader) by refactoring the calculation.
  337. // int bitIndex = 32.0h * (geoDepth - listMinDepth) / (listMaxDepth - listMinDepth);
  338. // Equivalent to:
  339. // a = 32.0 / (listMaxDepth - listMinDepth)
  340. // b = -listMinDepth * 32.0 / (listMaxDepth - listMinDepth)
  341. // int bitIndex = geoDepth * a + b;
  342. float a = 32.0f * depthRangeInv;
  343. float b = -listMinDepth * a;
  344. int tileDataSize = culledLightCount * 2;
  345. int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref tileDataSize) : 0;
  346. int headerOffset = GetTileHeaderOffset(i, j);
  347. _tileHeaders[headerOffset + 0] = (uint)tileOffset;
  348. _tileHeaders[headerOffset + 1] = (uint)(tileDataSize == 0 ? 0 : culledLightCount);
  349. _tileHeaders[headerOffset + 2] = _f32tof16(a) | (_f32tof16(b) << 16);
  350. _tileHeaders[headerOffset + 3] = bitMask;
  351. maxLightPerTile = max(maxLightPerTile, culledLightCount);
  352. }
  353. }
  354. m_Counters[0] = max(m_Counters[0], maxLightPerTile); // TODO make it atomic
  355. }
  356. // TODO: finer culling for spot lights
  357. unsafe public void CullIntermediateLights(ref NativeArray<PrePunctualLight> punctualLights,
  358. ref NativeArray<ushort> lightIndices, int lightStartIndex, int lightCount,
  359. int istart, int iend, int jstart, int jend)
  360. {
  361. // Interestingly, 2-3% faster when using unsafe arrays.
  362. PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights);
  363. ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices);
  364. uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders);
  365. if (lightCount == 0)
  366. {
  367. for (int j = jstart; j < jend; ++j)
  368. for (int i = istart; i < iend; ++i)
  369. {
  370. int headerOffset = GetTileHeaderOffset(i, j);
  371. _tileHeaders[headerOffset + 0] = 0;
  372. _tileHeaders[headerOffset + 1] = 0;
  373. }
  374. return;
  375. }
  376. // Store culled result in temporary buffer.
  377. ushort* tiles = stackalloc ushort[lightCount];
  378. int lightEndIndex = lightStartIndex + lightCount;
  379. for (int j = jstart; j < jend; ++j)
  380. {
  381. for (int i = istart; i < iend; ++i)
  382. {
  383. PreTile preTile = m_PreTiles[i + j * m_TileXCount];
  384. int culledLightCount = 0;
  385. for (int vi = lightStartIndex; vi < lightEndIndex; ++vi)
  386. {
  387. ushort lightIndex = _lightIndices[vi];
  388. PrePunctualLight ppl = _punctualLights[lightIndex];
  389. // This is slightly faster than IntersectionLineSphere().
  390. if (!Clip(ref preTile, ppl.posVS, ppl.radius))
  391. continue;
  392. tiles[culledLightCount] = lightIndex;
  393. ++culledLightCount;
  394. }
  395. // Copy the culled light list.
  396. int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref culledLightCount) : 0;
  397. int headerOffset = GetTileHeaderOffset(i, j);
  398. _tileHeaders[headerOffset + 0] = (uint)tileOffset;
  399. _tileHeaders[headerOffset + 1] = (uint)culledLightCount;
  400. }
  401. }
  402. }
  403. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  404. unsafe int AddTileData(ushort* lightData, ref int size)
  405. {
  406. int* _Counters = (int*)m_Counters.GetUnsafePtr();
  407. int tileDataSize = System.Threading.Interlocked.Add(ref _Counters[1], size);
  408. int offset = tileDataSize - size;
  409. if (tileDataSize <= m_TileData.Length)
  410. {
  411. ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr();
  412. UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2);
  413. return offset;
  414. }
  415. else
  416. {
  417. // Buffer overflow. Ignore data to add.
  418. // Gracefully increasing the buffer size is possible but costs extra CPU time (see commented code below) due to the needed critical section.
  419. m_Counters[2] = max(m_Counters[2], tileDataSize); // use an atomic max instead?
  420. size = 0;
  421. return 0;
  422. }
  423. /*
  424. lock (this)
  425. {
  426. int offset = m_TileDataSize;
  427. m_TileDataSize += size;
  428. ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr();
  429. if (m_TileDataSize > m_TileDataCapacity)
  430. {
  431. m_TileDataCapacity = max(m_TileDataSize, m_TileDataCapacity * 2);
  432. NativeArray<ushort> newTileData = new NativeArray<ushort>(m_TileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory);
  433. ushort* _newTileData = (ushort*)newTileData.GetUnsafePtr();
  434. UnsafeUtility.MemCpy(_newTileData, _TileData, offset * 2);
  435. m_TileData.Dispose();
  436. m_TileData = newTileData;
  437. _TileData = _newTileData;
  438. }
  439. UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2);
  440. return offset;
  441. }
  442. */
  443. }
  444. // Return parametric intersection between a sphere and a line.
  445. // The intersections points P0 and P1 are:
  446. // P0 = raySource + rayDirection * t0.
  447. // P1 = raySource + rayDirection * t1.
  448. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  449. unsafe static bool IntersectionLineSphere(float3 centre, float radius, float3 raySource, float3 rayDirection, out float t0, out float t1)
  450. {
  451. float A = dot(rayDirection, rayDirection); // always >= 0
  452. float B = dot(raySource - centre, rayDirection);
  453. float C = dot(raySource, raySource)
  454. + dot(centre, centre)
  455. - (radius * radius)
  456. - 2 * dot(raySource, centre);
  457. float discriminant = (B * B) - A * C;
  458. if (discriminant > 0)
  459. {
  460. float sqrt_discriminant = sqrt(discriminant);
  461. float A_inv = 1.0f / A;
  462. t0 = (-B - sqrt_discriminant) * A_inv;
  463. t1 = (-B + sqrt_discriminant) * A_inv;
  464. return true;
  465. }
  466. else
  467. {
  468. t0 = 0.0f; // invalid
  469. t1 = 0.0f; // invalid
  470. return false;
  471. }
  472. }
  473. // Clip a sphere against a 2D tile. Near and far planes are ignored (already tested).
  474. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  475. static bool Clip(ref PreTile tile, float3 posVS, float radius)
  476. {
  477. // Simplified clipping code, only deals with 4 clipping planes.
  478. // zNear and zFar clipping planes are ignored as presumably the light is already visible to the camera frustum.
  479. float radiusSq = radius * radius;
  480. int insideCount = 0;
  481. ClipResult res;
  482. res = ClipPartial(tile.planeLeft, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount);
  483. if (res != ClipResult.Unknown)
  484. return res == ClipResult.In;
  485. res = ClipPartial(tile.planeRight, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount);
  486. if (res != ClipResult.Unknown)
  487. return res == ClipResult.In;
  488. res = ClipPartial(tile.planeTop, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount);
  489. if (res != ClipResult.Unknown)
  490. return res == ClipResult.In;
  491. res = ClipPartial(tile.planeBottom, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount);
  492. if (res != ClipResult.Unknown)
  493. return res == ClipResult.In;
  494. return insideCount == 4;
  495. }
  496. // Internal function to clip against 1 plane of a cube, with additional 2 side planes for false-positive detection (normally 4 planes, but near and far planes are ignored).
  497. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  498. static ClipResult ClipPartial(float4 plane, float4 sidePlaneA, float4 sidePlaneB, float3 posVS, float radius, float radiusSq, ref int insideCount)
  499. {
  500. float d = DistanceToPlane(plane, posVS);
  501. if (d + radius <= 0.0f) // completely outside
  502. return ClipResult.Out;
  503. else if (d < 0.0f) // intersection: further check: only need to consider case where more than half the sphere is outside
  504. {
  505. float3 p = posVS - plane.xyz * d;
  506. float rSq = radiusSq - d * d;
  507. if (SignedSq(DistanceToPlane(sidePlaneA, p)) >= -rSq
  508. && SignedSq(DistanceToPlane(sidePlaneB, p)) >= -rSq)
  509. return ClipResult.In;
  510. }
  511. else // consider as good as completely inside
  512. ++insideCount;
  513. return ClipResult.Unknown;
  514. }
  515. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  516. static float4 MakePlane(float3 pb, float3 pc)
  517. {
  518. float3 v0 = pb;
  519. float3 v1 = pc;
  520. float3 n = cross(v0, v1);
  521. n = normalize(n);
  522. // The planes pass all by the origin.
  523. return new float4(n.x, n.y, n.z, 0.0f);
  524. }
  525. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  526. static float4 MakePlane(float3 pa, float3 pb, float3 pc)
  527. {
  528. float3 v0 = pb - pa;
  529. float3 v1 = pc - pa;
  530. float3 n = cross(v0, v1);
  531. n = normalize(n);
  532. return new float4(n.x, n.y, n.z, -dot(n, pa));
  533. }
  534. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  535. static float DistanceToPlane(float4 plane, float3 p)
  536. {
  537. return plane.x * p.x + plane.y * p.y + plane.z * p.z + plane.w;
  538. }
  539. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  540. static float SignedSq(float f)
  541. {
  542. // slower!
  543. //return Mathf.Sign(f) * (f * f);
  544. return (f < 0.0f ? -1.0f : 1.0f) * (f * f);
  545. }
  546. // Unity.Mathematics.max() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())!
  547. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  548. static float min2(float a, float b)
  549. {
  550. return a < b ? a : b;
  551. }
  552. // Unity.Mathematics.min() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())!
  553. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  554. static float max2(float a, float b)
  555. {
  556. return a > b ? a : b;
  557. }
  558. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  559. static float max3(float a, float b, float c)
  560. {
  561. return a > b ? (a > c ? a : c) : (b > c ? b : c);
  562. }
  563. // This is copy-pasted from Unity.Mathematics.math.f32tof16(), but use min2() function that does not check for NaN (which would consume 10% of the execution time of CullFinalLights()).
  564. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  565. public static uint _f32tof16(float x)
  566. {
  567. const int infinity_32 = 255 << 23;
  568. const uint msk = 0x7FFFF000u;
  569. uint ux = asuint(x);
  570. uint uux = ux & msk;
  571. uint h = (uint)(asuint(min2(asfloat(uux) * 1.92592994e-34f, 260042752.0f)) + 0x1000) >> 13; // Clamp to signed infinity if overflowed
  572. h = select(h, select(0x7c00u, 0x7e00u, (int)uux > infinity_32), (int)uux >= infinity_32); // NaN->qNaN and Inf->Inf
  573. return h | (ux & ~msk) >> 16;
  574. }
  575. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  576. static int Align(int s, int alignment)
  577. {
  578. return ((s + alignment - 1) / alignment) * alignment;
  579. }
  580. }
  581. }