168
社区成员




本人长期开发、维护OpenCL开源可导光线追踪渲染器项目LibreDR https://codeberg.org/ybh1998/LibreDR 。最近在尝试适配摩尔线程s80显卡,在迁移摩尔线程s80时遇到编译kernel时驱动崩溃问题,详情如下:
本人在Ubuntu 20.04下分别尝试了musa_2.5.0和musa_2.7.1-rc3版本驱动,将kernel源代码使用clCreateProgramWithSource传入OpenCL驱动,kernel代码在附件“debug_kernel.cl”中。调用clBuildProgram编译kernel时程序发生segmentation fault,backtrace如附件“backtrace.png”中所示,显示segmentation fault发生在OpenCL驱动libufwriter_MUSA.so中,但由于缺失debug symbol,不能确定具体位置。
本人发现在clinfo中,摩尔线程显卡汇报支持cl_khr_spir扩展。本人尝试严格按照 https://github.com/KhronosGroup/SPIR 描述的标准编译SPIR程序,使用clCreateProgramWithBinary传入OpenCL驱动,在clBuildProgram时返回CL_DEVICE_NOT_AVAILABLE错误信息。
1. 使用源代码传入kernel时,由于segmentation fault发生在驱动代码内,是否方便处理相关驱动代码的问题?如果方便提供libufwriter_MUSA.so相关代码的debug symbol,本人可以协助调试;
2. 另外想问摩尔线程s80显卡OpenCL是否支持SPIR输入程序?如果支持,能否提供相应的使用样例?
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
typedef struct {
float3 r, rd, rn, render, render_weight;
float3 render_last, render_last_weight;
float2 rt;
float pdf;
ulong2 pcg32_state;
int ri, hit_cnt;
} Ray;
typedef struct __attribute__((__packed__)) {
const float vertex[8][3][170u];
const float uv_xyz[4][256u][256u];
const int bvh_left[86u], bvh_right[86u], bvh_next[86u];
const float bvh_corner[3][2][86u];
} GeometryBuffer;
typedef struct __attribute__((__packed__)) {
float ray[27u][262144u];
const float texture[14][256u][256u];
const uint texture_cdf[256u][256u];
float d_texture[14][256u][256u][8u];
const float envmap[3][6][512u][512u];
const uint envmap_cdf[6][512u][512u];
float d_envmap[3][6][512u][512u][8u];
} RenderBuffer;
typedef struct __attribute__((__packed__)) {
float ray_r[3][2u][262144u][128u];
uint ray_ri[2u][262144u][128u];
float ray_pdf[2u][262144u][128u];
float ray_render_last[3][262144u][128u];
} IntermediateBuffer;
uint pcg32_next(Ray *ray) {
ulong old_state = ray->pcg32_state.x;
ray->pcg32_state.x = old_state * 0x5851f42d4c957f2dul + ray->pcg32_state.y;
uint xor_shifted = ((old_state >> 18u) ^ old_state) >> 27u;
uint rot = old_state >> 59u;
return (xor_shifted >> rot) | (xor_shifted << ((-rot) & 31));
}
float pcg32_next_float(Ray *ray) {
union {
uint u;
float f;
} x;
x.u = (pcg32_next(ray) >> 9) | 0x3f800000u;
return x.f - 1.f;
}
void pcg32_init(Ray *ray, ulong seed, ulong inc) {
ray->pcg32_state.x = 0ul;
ray->pcg32_state.y = (inc << 1u) | 1u;
pcg32_next(ray);
ray->pcg32_state.x += (0x853c49e6748fea9bul + seed);
pcg32_next(ray);
}
float halton_float(const uint low_discrepancy, const uint base) {
const uint PRIMES[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89};
const uint prime_base = PRIMES[base];
float f = 1, r = 0;
for (uint index = get_global_id(1) + low_discrepancy + 1; index > 0; index /= prime_base) {
f /= prime_base;
r += f * (index % prime_base);
}
return r;
}
uint halton(const uint low_discrepancy, const uint base) {
return halton_float(low_discrepancy, base) * (float)UINT_MAX;
}
void atomic_add_f(volatile global float *source, const float operand) {
if (operand == 0.f)
return;
union {
uint u32;
float f32;
} next, expected, current;
current.f32 = *source;
do {
expected.f32 = current.f32;
next.f32 = expected.f32 + operand;
current.u32 = atomic_cmpxchg((volatile __global uint *)source, expected.u32, next.u32);
} while (current.u32 != expected.u32);
}
void local_gather_atomic_add_f (volatile global float *source, local float *operand_gather, const float operand) {
operand_gather[get_local_id(1)] = operand;
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = get_local_size(1) / 2; i > 0; i >>= 1) {
if (get_local_id(1) < i)
operand_gather[get_local_id(1)] += operand_gather[get_local_id(1) + i];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(1) == 0)
atomic_add_f(source, operand_gather[0]);
barrier(CLK_LOCAL_MEM_FENCE);
}
void d_material_normal(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float3 d_normal) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[0][texture_index.x][texture_index.y][i_bucket], d_normal.x);
atomic_add_f(&render_buffer->d_texture[1][texture_index.x][texture_index.y][i_bucket], d_normal.y);
atomic_add_f(&render_buffer->d_texture[2][texture_index.x][texture_index.y][i_bucket], d_normal.z);
}
float3 material_diffuse(
const volatile global RenderBuffer *render_buffer,
const uint2 texture_index,
const int i_bounce) {
return (float3)(render_buffer->texture[3][texture_index.x][texture_index.y],
render_buffer->texture[4][texture_index.x][texture_index.y],
render_buffer->texture[5][texture_index.x][texture_index.y]);
}
void d_material_diffuse(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float3 d_diffuse) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[3][texture_index.x][texture_index.y][i_bucket], d_diffuse.x);
atomic_add_f(&render_buffer->d_texture[4][texture_index.x][texture_index.y][i_bucket], d_diffuse.y);
atomic_add_f(&render_buffer->d_texture[5][texture_index.x][texture_index.y][i_bucket], d_diffuse.z);
}
float3 material_specular(
const volatile global RenderBuffer *render_buffer,
const uint2 texture_index,
const int i_bounce) {
return (float3)(render_buffer->texture[6][texture_index.x][texture_index.y],
render_buffer->texture[7][texture_index.x][texture_index.y],
render_buffer->texture[8][texture_index.x][texture_index.y]);
}
void d_material_specular(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float3 d_specular) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[6][texture_index.x][texture_index.y][i_bucket], d_specular.x);
atomic_add_f(&render_buffer->d_texture[7][texture_index.x][texture_index.y][i_bucket], d_specular.y);
atomic_add_f(&render_buffer->d_texture[8][texture_index.x][texture_index.y][i_bucket], d_specular.z);
}
float material_roughness(
const volatile global RenderBuffer *render_buffer,
const uint2 texture_index,
const int i_bounce) {
return render_buffer->texture[9][texture_index.x][texture_index.y];
}
void d_material_roughness(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float d_roughness) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[9][texture_index.x][texture_index.y][i_bucket], d_roughness);
}
float phong_roughness_to_shininess(const float roughness) {
return 2.f / roughness - 2.f;
}
float d_phong_roughness_to_shininess(const float roughness, const float d_shininess) {
return -2.f * d_shininess / pown(roughness, 2);
}
float specular_D_beckmann(const float3 rn, const float3 h, const float roughness) {
const float alpha = roughness / 4.f;
const float n_dot_h = dot(rn, h);
return exp((pown(n_dot_h, 2) - 1.f) / (alpha * pown(n_dot_h, 2))) /
(4 * M_PI_F * alpha * pown(n_dot_h, 4));
}
void d_specular_D_beckmann(
const float3 rn,
const float3 h,
const float roughness,
const float d_D,
float3 *d_normal,
float *d_roughness) {
const float alpha = roughness / 4.f;
const float n_dot_h = dot(rn, h);
const float d_n_dot_h = d_D * exp((pown(n_dot_h, 2) - 1.f) / (alpha * pown(n_dot_h, 2))) *
(1.f - 2.f * alpha * pown(n_dot_h, 2)) / (2.f * M_PI_F * pown(alpha, 2) * pown(n_dot_h, 7));
*d_normal += d_n_dot_h * h;
const float d_alpha = d_D * exp((pown(n_dot_h, 2) - 1.f) / (alpha * pown(n_dot_h, 2))) *
(1.f - (alpha + 1.f) * pown(n_dot_h, 2)) / (4.f * M_PI_F * pown(n_dot_h, 6) * pow(alpha, 3));
*d_roughness += d_alpha / 4.f;
}
int bin_search(const global uint *cdf, const uint size, const uint value) {
int a = -1, b = size - 1;
while (a + 1 < b) {
int m = (a + b) / 2;
if (cdf[m] < value)
a = m;
else
b = m;
}
return b;
}
float3 envmap_offset_to_rd(const uint envmap_offset) {
const uint envmap_face_id = envmap_offset / (512u * 512u);
const uint envmap_row = (envmap_offset / 512u) % 512u;
const uint envmap_col = envmap_offset % 512u;
const float2 background_uv = (float2)(
(envmap_col + 0.5f) / 512u,
(512u - 0.5f - envmap_row) / 512u) * 2.f - 1.f;
float3 next_rd;
switch (envmap_face_id / 2) {
case 0:
next_rd.x = (envmap_face_id % 2) ? -1.f : 1.f;
next_rd.y = background_uv.x;
next_rd.z = background_uv.y;
break;
case 1:
next_rd.x = background_uv.x;
next_rd.y = (envmap_face_id % 2) ? -1.f : 1.f;
next_rd.z = background_uv.y;
break;
case 2:
next_rd.x = background_uv.x;
next_rd.y = background_uv.y;
next_rd.z = (envmap_face_id % 2) ? -1.f : 1.f;
break;
}
next_rd /= sqrt(dot(next_rd, next_rd));
return next_rd;
}
float3 sample_reflection(
const global GeometryBuffer *geometry_buffer,
const volatile global RenderBuffer *render_buffer,
Ray *ray,
const uint2 texture_index,
int i_bounce,
uint low_discrepancy) {
const float3 r = ray->rd - 2 * dot(ray->rd, ray->rn) * ray->rn;
const uint w = i_bounce < 0 ? halton(low_discrepancy, i_bounce * 3 + 2) : pcg32_next(ray);
float tmp;
float3 next_rd = (float3)(0.f, 0.f, 0.f);
if (w < 1073741826u + 1073741823u) {
float3 n_coord[3];
n_coord[2] = ray->rn;
n_coord[0] = cross((float3)(0.f, 0.f, 1.f), n_coord[2]);
tmp = dot(n_coord[0], n_coord[0]);
if (tmp < 1e-3f) {
n_coord[0] = cross((float3)(0.f, 1.f, 0.f), n_coord[2]);
tmp = dot(n_coord[0], n_coord[0]);
}
if (tmp < 1e-3f) {
n_coord[0] = cross((float3)(1.f, 0.f, 0.f), n_coord[2]);
tmp = dot(n_coord[0], n_coord[0]);
}
n_coord[0] /= sqrt(tmp);
n_coord[1] = cross(n_coord[0], n_coord[2]);
n_coord[1] /= sqrt(dot(n_coord[1], n_coord[1]));
const float sample_a =
i_bounce < 0 ? halton_float(low_discrepancy, i_bounce * 3 + 3) : pcg32_next_float(ray);
const float sample_b =
i_bounce < 0 ? halton_float(low_discrepancy, i_bounce * 3 + 4) : pcg32_next_float(ray);
const float2 sample = (float2)(sample_a, sample_b);
const float sin_phi = sin(2.f * M_PI_F * sample.x);
const float cos_phi = cos(2.f * M_PI_F * sample.x);
if (w <= 1073741826u) {
tmp = sqrt(1.f - sample.y);
const float3 local_dir = (float3)(cos_phi * tmp, sin_phi * tmp, sqrt(sample.y));
next_rd = local_dir.x * n_coord[0] + local_dir.y * n_coord[1] + local_dir.z * n_coord[2];
} else {
float3 r_coord[3];
r_coord[2] = r;
r_coord[0] = cross((float3)(0.f, 0.f, 1.f), r_coord[2]);
tmp = dot(r_coord[0], r_coord[0]);
if (tmp < 1e-3f) {
r_coord[0] = cross((float3)(0.f, 1.f, 0.f), r_coord[2]);
tmp = dot(r_coord[0], r_coord[0]);
}
if (tmp < 1e-3f) {
r_coord[0] = cross((float3)(1.f, 0.f, 0.f), r_coord[2]);
tmp = dot(r_coord[0], r_coord[0]);
}
r_coord[0] /= sqrt(tmp);
r_coord[1] = cross(r_coord[0], r_coord[2]);
r_coord[1] /= sqrt(dot(r_coord[1], r_coord[1]));
const float roughness = material_roughness(render_buffer, texture_index, i_bounce);
const float shininess = phong_roughness_to_shininess(roughness);
const float cos_theta = pow(sample.y, 1.f / (shininess + 1.f));
const float sin_theta = sqrt(1.f - cos_theta * cos_theta);
const float3 local_dir = (float3)(cos_phi * sin_theta, sin_phi * sin_theta, cos_theta);
next_rd = local_dir.x * r_coord[0] + local_dir.y * r_coord[1] + local_dir.z * r_coord[2];
if (dot(next_rd, ray->rn) < 0) {
next_rd -= 2 * dot(next_rd, ray->rn) * ray->rn;
}
}
} else if (w < 1073741826u + 1073741823u + 1073741823u) {
const uint texture_offset = bin_search(
(const global uint *)render_buffer->texture_cdf,
256u * 256u,
i_bounce < 0 ? halton(low_discrepancy, i_bounce * 3 + 3) : pcg32_next(ray));
const uint2 texture_index = (uint2)(texture_offset / 256u, texture_offset % 256u);
const float3 look_at = (float3)(
geometry_buffer->uv_xyz[0][texture_index.x][texture_index.y],
geometry_buffer->uv_xyz[1][texture_index.x][texture_index.y],
geometry_buffer->uv_xyz[2][texture_index.x][texture_index.y]);
next_rd = look_at - ray->r;
const float dist = sqrt(dot(next_rd, next_rd));
if (dist < 1e-3f) {
ray->render_weight = (float3)(0.f, 0.f, 0.f);
next_rd = (float3)(0.f, 1.f, 0.f);
} else {
next_rd /= dist;
}
} else {
const uint envmap_offset = bin_search(
(const global uint *)render_buffer->envmap_cdf,
6 * 512u * 512u,
i_bounce < 0 ? halton(low_discrepancy, i_bounce * 3 + 3) : pcg32_next(ray));
next_rd = envmap_offset_to_rd(envmap_offset);
}
return next_rd;
}
float3 diffuse_brdf(
const volatile global RenderBuffer *render_buffer,
Ray *ray,
const float3 next_rd,
const uint2 texture_index,
const int i_bounce) {
float3 brdf = (float3)(0.f, 0.f, 0.f);
if (dot(ray->rn, next_rd) < 1e-3f)
return brdf;
const float3 diffuse = material_diffuse(render_buffer, texture_index, i_bounce);
brdf = diffuse * dot(ray->rn, next_rd) / M_PI_F;
return brdf;
}
void d_diffuse_brdf(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
Ray *ray,
float3 next_rd,
const uint2 texture_index,
const int i_bounce,
float3 d_brdf) {
float3 d_diffuse = (float3)(0.f, 0.f, 0.f);
float3 d_normal = (float3)(0.f, 0.f, 0.f);
if (fabs(d_brdf.x) < 1e-6f && fabs(d_brdf.y) < 1e-6f && fabs(d_brdf.z) < 1e-6f)
goto d_return;
if (dot(ray->rn, next_rd) < 1e-3f)
goto d_return;
d_diffuse += d_brdf * dot(ray->rn, next_rd) / M_PI_F;
const float3 diffuse = material_diffuse(render_buffer, texture_index, i_bounce);
d_normal += next_rd * dot(d_brdf, diffuse) / M_PI_F;
d_return:
d_material_diffuse(render_buffer, operand_gather, texture_index, i_bounce, d_diffuse);
d_material_normal(render_buffer, operand_gather, texture_index, i_bounce, d_normal);
}
float3 specular_brdf(
const volatile global RenderBuffer *render_buffer,
Ray *ray,
const float3 next_rd,
const uint2 texture_index,
const int i_bounce) {
float3 brdf = (float3)(0.f, 0.f, 0.f);
if (dot(ray->rn, next_rd) < 1e-3f)
return brdf;
const float3 specular = material_specular(render_buffer, texture_index, i_bounce);
const float roughness = material_roughness(render_buffer, texture_index, i_bounce);
// TODO: Shall we move the data EPS check to rust code?
if (roughness < 1e-6f)
return brdf;
float3 h = next_rd - ray->rd;
if (dot(h, h) < 1e-3f)
return brdf;
h /= sqrt(dot(h, h));
if (dot(h, -ray->rd) < 1e-3f || dot(h, ray->rn) < 1e-3f)
return brdf;
const float3 F = specular + (1.f - specular) * pown(1.f - dot(ray->rn, -ray->rd), 5);
const float G1 = 2.f * dot(h, ray->rn) * dot(ray->rn, -ray->rd) / dot(h, -ray->rd);
const float G2 = 2.f * dot(h, ray->rn) * dot(ray->rn, next_rd) / dot(h, -ray->rd);
const float G = min(1.f, min(G1, G2));
const float D = specular_D_beckmann(ray->rn, h, roughness);
brdf = D * G * F / dot(ray->rn, -ray->rd);
return brdf;
}
void d_specular_brdf(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
Ray *ray,
float3 next_rd,
const uint2 texture_index,
const int i_bounce,
float3 d_brdf) {
float3 d_specular = (float3)(0.f, 0.f, 0.f);
float d_roughness = 0.f;
float3 d_normal = (float3)(0.f, 0.f, 0.f);
if (fabs(d_brdf.x) < 1e-6f && fabs(d_brdf.y) < 1e-6f && fabs(d_brdf.z) < 1e-6f)
goto d_return;
if (dot(ray->rn, next_rd) < 1e-3f)
goto d_return;
const float3 specular = material_specular(render_buffer, texture_index, i_bounce);
float roughness = material_roughness(render_buffer, texture_index, i_bounce);
// TODO: Shall we move the data EPS check to rust code?
if (roughness < 1e-6f)
goto d_return;
float3 h = next_rd - ray->rd;
if (dot(h, h) < 1e-3f)
goto d_return;
h /= sqrt(dot(h, h));
if (dot(h, -ray->rd) < 1e-3f || dot(h, ray->rn) < 1e-3f)
goto d_return;
const float3 F = specular + (1.f - specular) * pown(1.f - dot(ray->rn, -ray->rd), 5);
const float G1 = 2.f * dot(h, ray->rn) * dot(ray->rn, -ray->rd) / dot(h, -ray->rd);
const float G2 = 2.f * dot(h, ray->rn) * dot(ray->rn, next_rd) / dot(h, -ray->rd);
const float G = min(1.f, min(G1, G2));
const float D = specular_D_beckmann(ray->rn, h, roughness);
const float d_D = dot(d_brdf, F) * G / dot(ray->rn, -ray->rd);
float3 d_n_dot_v = -dot(d_brdf, F) * D * G / pown(dot(ray->rn, -ray->rd), 2);
const float3 d_F = d_brdf * D * G / dot(ray->rn, -ray->rd);
d_specular = d_F * (1.f - pown(1.f - dot(ray->rn, -ray->rd), 5));
d_n_dot_v += -5 * dot(d_F, 1.f - specular) * pown(1.f - dot(ray->rn, -ray->rd), 4);
d_normal += d_n_dot_v * -ray->rd;
const float d_G = dot(d_brdf, F) * D / dot(ray->rn, -ray->rd);
if (G1 < 1.f && G1 <= G2) {
d_normal += 2.f * d_G * (dot(h, ray->rn) * -ray->rd + h * dot(ray->rn, -ray->rd)) / dot(h, -ray->rd);
}
if (G2 < 1.f && G2 < G1) {
d_normal += 2.f * d_G * (dot(h, ray->rn) * next_rd + h * dot(ray->rn, next_rd)) / dot(h, -ray->rd);
}
d_specular_D_beckmann(ray->rn, h, roughness, d_D, &d_normal, &d_roughness);
d_return:
d_material_specular(render_buffer, operand_gather, texture_index, i_bounce, d_specular);
d_material_roughness(render_buffer, operand_gather, texture_index, i_bounce, d_roughness);
d_material_normal(render_buffer, operand_gather, texture_index, i_bounce, d_normal);
}
float diffuse_pdf(Ray *ray, const float3 next_rd) {
const float pdf = max(dot(ray->rn, next_rd), 0.f) / M_PI_F;
return 0.25000000052386895f * pdf;
}
float specular_pdf(
const volatile global RenderBuffer *render_buffer,
Ray *ray,
const float3 next_rd,
const uint2 texture_index,
const int i_bounce) {
float pdf = 0.f;
if (dot(ray->rn, next_rd) < 1e-3f)
return pdf;
const float roughness = material_roughness(render_buffer, texture_index, i_bounce);
const float shininess = phong_roughness_to_shininess(roughness);
const float3 r = ray->rd - 2 * dot(ray->rd, ray->rn) * ray->rn;
float r_dot_wi = max(dot(r, next_rd - 2 * dot(next_rd, ray->rn) * ray->rn), 0.f);
pdf += pow(r_dot_wi, shininess) * (shininess + 1.f) / (2.f * M_PI_F);
r_dot_wi = max(dot(r, next_rd), 0.f);
pdf += pow(r_dot_wi, shininess) * (shininess + 1.f) / (2.f * M_PI_F);
return 0.24999999982537702f * pdf;
}
float float3_access(const float3 data, const uint idx) {
if (idx == 0)
return data.x;
if (idx == 1)
return data.y;
return data.z;
}
float envmap_pdf(const global uint *cdf, const Ray *ray, const uint3 envmap_index) {
uint envmap_offset = envmap_index.x * 512u * 512u +
envmap_index.y * 512u + envmap_index.z;
uint pdf = cdf[envmap_offset];
if (envmap_offset > 0)
pdf -= cdf[envmap_offset - 1];
float n_dot_v = max(fabs(ray->rd.x), max(fabs(ray->rd.y), fabs(ray->rd.z)));
return 3.814697263848643e-6f * pdf / pown(n_dot_v, 3);
}
void background_intersect(const volatile global RenderBuffer *render_buffer, Ray *ray, uint3 *envmap_index) {
uint face_id = 0;
const float3 rd_abs = fabs(ray->rd);
if (rd_abs.x > rd_abs.y) {
if (rd_abs.x > rd_abs.z)
face_id = 0;
else
face_id = 2;
} else {
if (rd_abs.y > rd_abs.z)
face_id = 1;
else
face_id = 2;
}
const float3 rd = ray->rd / float3_access(rd_abs, face_id);
float2 uv;
switch (face_id) {
case 0:
uv = (float2)(rd.y, rd.z);
break;
case 1:
uv = (float2)(rd.x, rd.z);
break;
case 2:
uv = (float2)(rd.x, rd.y);
break;
}
uv = (uv + 1.f) / 2.f;
if (float3_access(ray->rd, face_id) < 0)
face_id = face_id * 2 + 1;
else
face_id = face_id * 2;
uv = clamp(uv, 1e-6f, 1.f - 1e-6f);
envmap_index->x = face_id;
envmap_index->y = 512u - 1 - floor(uv.y * 512u);
envmap_index->z = floor(uv.x * 512u);
ray->pdf += envmap_pdf((const global uint *)render_buffer->envmap_cdf, ray, *envmap_index);
}
float triangle_intersect(const global GeometryBuffer *geometry_buffer, Ray *ray, const int triangle_id) {
const float3 tri_v0 = (float3)(
geometry_buffer->vertex[0][0][triangle_id],
geometry_buffer->vertex[1][0][triangle_id],
geometry_buffer->vertex[2][0][triangle_id]);
const float3 u = (float3)(
geometry_buffer->vertex[0][1][triangle_id],
geometry_buffer->vertex[1][1][triangle_id],
geometry_buffer->vertex[2][1][triangle_id]) - tri_v0;
const float3 v = (float3)(
geometry_buffer->vertex[0][2][triangle_id],
geometry_buffer->vertex[1][2][triangle_id],
geometry_buffer->vertex[2][2][triangle_id]) - tri_v0;
const float3 fn = cross(u, v);
const float3 w0 = ray->r - tri_v0;
const float a = -dot(fn, w0);
const float b = dot(fn, ray->rd);
if (fabs(b) < 1e-12f) return MAXFLOAT;
const float r = a / b;
if (r <= 0) return MAXFLOAT;
const float3 I = ray->r + ray->rd * r;
const float uu = dot(u, u);
const float uv = dot(u, v);
const float vv = dot(v, v);
const float3 w = I - tri_v0;
const float wu = dot(w, u);
const float wv = dot(w, v);
const float D = uu * vv - uv * uv;
const float D_min = -1e-5f * D;
const float D_max = D - D_min;
const float s = vv * wu - uv * wv;
if (s < D_min || s > D_max) return MAXFLOAT;
const float t = uu * wv - uv * wu;
if (t < D_min || (s + t) > D_max) return MAXFLOAT;
return r;
}
bool aabb_intersect(const global GeometryBuffer *geometry_buffer, Ray *ray, const int bvh_id) {
const float3 corner[2] = {
(float3)(
geometry_buffer->bvh_corner[0][0][bvh_id],
geometry_buffer->bvh_corner[1][0][bvh_id],
geometry_buffer->bvh_corner[2][0][bvh_id]),
(float3)(
geometry_buffer->bvh_corner[0][1][bvh_id],
geometry_buffer->bvh_corner[1][1][bvh_id],
geometry_buffer->bvh_corner[2][1][bvh_id])
};
const float3 dir_frac = 1.0f / ray->rd;
const float3 tmp[2] = {
(corner[0] - ray->r) * dir_frac,
(corner[1] - ray->r) * dir_frac
};
const float t_min = max(max(min(tmp[0].x, tmp[1].x), min(tmp[0].y, tmp[1].y)), min(tmp[0].z, tmp[1].z));
const float t_max = min(min(max(tmp[0].x, tmp[1].x), max(tmp[0].y, tmp[1].y)), max(tmp[0].z, tmp[1].z));
return t_max >= t_min;
}
float update_ray(
const global GeometryBuffer *geometry_buffer,
Ray *ray,
const float d,
const int triangle_id,
uint2 *texture_index) {
const float3 next_r = ray->r + ray->rd * d;
const float3 tri_v0 = (float3)(
geometry_buffer->vertex[0][0][triangle_id],
geometry_buffer->vertex[1][0][triangle_id],
geometry_buffer->vertex[2][0][triangle_id]);
const float3 tri_v1 = (float3)(
geometry_buffer->vertex[0][1][triangle_id],
geometry_buffer->vertex[1][1][triangle_id],
geometry_buffer->vertex[2][1][triangle_id]);
const float3 tri_v2 = (float3)(
geometry_buffer->vertex[0][2][triangle_id],
geometry_buffer->vertex[1][2][triangle_id],
geometry_buffer->vertex[2][2][triangle_id]);
const float2 tri_vt0 = (float2)(
geometry_buffer->vertex[6][0][triangle_id],
geometry_buffer->vertex[7][0][triangle_id]);
const float2 tri_vt1 = (float2)(
geometry_buffer->vertex[6][1][triangle_id],
geometry_buffer->vertex[7][1][triangle_id]);
const float2 tri_vt2 = (float2)(
geometry_buffer->vertex[6][2][triangle_id],
geometry_buffer->vertex[7][2][triangle_id]);
const float3 u_cross_v = cross(tri_v1 - tri_v0, tri_v2 - tri_v0);
const float area_v = sqrt(dot(u_cross_v, u_cross_v));
const float area_vt = fabs((tri_vt1 - tri_vt0).x * (tri_vt2 - tri_vt0).y -
(tri_vt1 - tri_vt0).y * (tri_vt2 - tri_vt0).x);
float w = dot(cross(tri_v1 - tri_v0, next_r - tri_v0), u_cross_v) / dot(u_cross_v, u_cross_v);
float v = dot(cross(next_r - tri_v0, tri_v2 - tri_v0), u_cross_v) / dot(u_cross_v, u_cross_v);
w = max(1e-6f, min(1.f - 1e-6f, w));
v = max(1e-6f, min(1.f - 1e-6f - w, v));
float u = 1.f - w - v;
ray->rt = tri_vt0 * u + tri_vt1 * v + tri_vt2 * w;
ray->rt = clamp(ray->rt, 1e-6f, 1.f - 1e-6f);
texture_index->x = 256u - 1 - floor(ray->rt.y * 256u);
texture_index->y = floor(ray->rt.x * 256u);
ray->rn = u_cross_v / sqrt(dot(u_cross_v, u_cross_v));
if (dot(ray->rd, ray->rn) > 0)
ray->rn = -ray->rn;
return area_vt / area_v;
}
float texture_pdf(
const global uint *texture_cdf,
const Ray *ray,
const float d,
const float texture_area_ratio,
const uint2 texture_index) {
const uint texture_offset = texture_index.x * 256u + texture_index.y;
uint pdf = texture_cdf[texture_offset];
if (texture_offset > 0)
pdf -= texture_cdf[texture_offset - 1];
float n_dot_v = fabs(dot(ray->rd, ray->rn));
if (n_dot_v < 1e-3f)
return MAXFLOAT;
return 3.814697263848643e-6f * pdf * texture_area_ratio * pown(d, 2) / n_dot_v;
}
void any_hit(
const global GeometryBuffer *geometry_buffer,
const global uint *texture_cdf,
Ray *ray,
const float d,
const int triangle_id,
uint2 *texture_index) {
const float texture_area_ratio = update_ray(geometry_buffer, ray, d, triangle_id, texture_index);
ray->pdf += texture_pdf(texture_cdf, ray, d, texture_area_ratio, *texture_index);
}
void closest_hit(
const global GeometryBuffer *geometry_buffer,
Ray *ray,
const float d,
uint2 *texture_index) {
update_ray(geometry_buffer, ray, d, ray->ri, texture_index);
ray->r += ray->rd * d;
}
void miss(Ray *ray) {
ray->rn = (float3)(0.0f, 0.0f, 0.0f);
ray->rt = (float2)(0.0f, 0.0f);
ray->ri = -1;
}
float scene_intersect(
const global GeometryBuffer *geometry_buffer,
const global uint *texture_cdf,
Ray *ray,
uint2 *texture_index) {
if (ray->ri == -2)
return 0.f;
ray->hit_cnt = 0;
float d = MAXFLOAT;
int bvh_id = 0;
while (bvh_id != -1) {
int bvh_left = geometry_buffer->bvh_left[bvh_id];
int bvh_right = geometry_buffer->bvh_right[bvh_id];
int bvh_next = geometry_buffer->bvh_next[bvh_id];
bool has_intersect = aabb_intersect(geometry_buffer, ray, bvh_id);
bool bvh_is_leaf = (bvh_right != -1);
if (!has_intersect) {
bvh_id = bvh_next;
continue;
}
if (!bvh_is_leaf) {
bvh_id = bvh_left;
continue;
}
for (int i = bvh_left; i < bvh_right; ++i) {
float new_d = triangle_intersect(geometry_buffer, ray, i);
if (new_d != MAXFLOAT) {
if (new_d < d) {
ray->ri = i;
d = new_d;
}
ray->hit_cnt++;
any_hit(geometry_buffer, texture_cdf, ray, new_d, i, texture_index);
}
}
bvh_id = bvh_next;
}
if (ray->hit_cnt) {
closest_hit(geometry_buffer, ray, d, texture_index);
return d;
}
miss(ray);
return 0.f;
}
Ray ray_init(const int srand, const uint low_discrepancy, const volatile global RenderBuffer *render_buffer) {
Ray ray;
const uint i_pixel = get_global_id(0);
const uint i_sample = get_global_id(1);
if (srand >= 0)
pcg32_init(&ray, srand, i_sample);
else
pcg32_init(&ray, i_pixel + get_global_size(0) * (1 - srand), i_sample);
float2 pixel_coord = (float2)(pcg32_next_float(&ray), pcg32_next_float(&ray));
if (i_sample == 0) {
pixel_coord = (float2)(0.5f, 0.5f);
}
ray.r = (float3)(render_buffer->ray[0][i_pixel],
render_buffer->ray[1][i_pixel],
render_buffer->ray[2][i_pixel]
) + pixel_coord.x * (float3)(render_buffer->ray[3][i_pixel],
render_buffer->ray[4][i_pixel],
render_buffer->ray[5][i_pixel]
) + pixel_coord.y * (float3)(render_buffer->ray[6][i_pixel],
render_buffer->ray[7][i_pixel],
render_buffer->ray[8][i_pixel]);
ray.rd = (float3)(render_buffer->ray[9][i_pixel],
render_buffer->ray[10][i_pixel],
render_buffer->ray[11][i_pixel]
) + pixel_coord.x * (float3)(render_buffer->ray[12][i_pixel],
render_buffer->ray[13][i_pixel],
render_buffer->ray[14][i_pixel]
) + pixel_coord.y * (float3)(render_buffer->ray[15][i_pixel],
render_buffer->ray[16][i_pixel],
render_buffer->ray[17][i_pixel]);
ray.rd /= sqrt(dot(ray.rd, ray.rd));
ray.render = (float3)(0.f, 0.f, 0.f);
ray.render_last = (float3)(0.f, 0.f, 0.f);
ray.render_last_weight = (float3)(1.f, 1.f, 1.f);
ray.ri = 0;
return ray;
}
void material_normal(
Ray *ray,
const volatile global RenderBuffer *render_buffer,
const uint2 texture_index,
const int i_bounce) {
if (!(false && i_bounce == 0) && ray->ri >= 0)
ray->rn = (float3)(render_buffer->texture[0][texture_index.x][texture_index.y],
render_buffer->texture[1][texture_index.x][texture_index.y],
render_buffer->texture[2][texture_index.x][texture_index.y]);
if (dot(ray->rn, ray->rn) < 1e-3f) {
ray->ri = -2;
} else {
ray->rn /= sqrt(dot(ray->rn, ray->rn));
if (dot(ray->rd, ray->rn) > 0)
ray->rn = -ray->rn;
if (dot(ray->rd, ray->rn) > -1e-3f)
ray->ri = -2;
}
}
float3 material_intensity(const volatile global RenderBuffer *render_buffer, const uint2 texture_index, const int i_bounce) {
return (float3)(render_buffer->texture[10][texture_index.x][texture_index.y],
render_buffer->texture[11][texture_index.x][texture_index.y],
render_buffer->texture[12][texture_index.x][texture_index.y]);
}
float material_window(const volatile global RenderBuffer *render_buffer, const uint2 texture_index, const int i_bounce) {
return render_buffer->texture[13][texture_index.x][texture_index.y];
}
float3 material_envmap(const volatile global RenderBuffer *render_buffer, const uint3 envmap_index) {
return (float3)(render_buffer->envmap[0][envmap_index.x][envmap_index.y][envmap_index.z],
render_buffer->envmap[1][envmap_index.x][envmap_index.y][envmap_index.z],
render_buffer->envmap[2][envmap_index.x][envmap_index.y][envmap_index.z]);
}
void d_material_intensity(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float3 d_intensity) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[10][texture_index.x][texture_index.y][i_bucket], d_intensity.x);
atomic_add_f(&render_buffer->d_texture[11][texture_index.x][texture_index.y][i_bucket], d_intensity.y);
atomic_add_f(&render_buffer->d_texture[12][texture_index.x][texture_index.y][i_bucket], d_intensity.z);
}
void d_material_window(
volatile global RenderBuffer *render_buffer,
local float *operand_gather,
const uint2 texture_index,
const int i_bounce,
const float d_window) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_texture[13][texture_index.x][texture_index.y][i_bucket], d_window);
}
void d_material_envmap(volatile global RenderBuffer *render_buffer, const uint3 envmap_index, const float3 d_envmap) {
const uint i_bucket = get_global_id(1) % 8u;
atomic_add_f(&render_buffer->d_envmap[0][envmap_index.x][envmap_index.y][envmap_index.z][i_bucket], d_envmap.x);
atomic_add_f(&render_buffer->d_envmap[1][envmap_index.x][envmap_index.y][envmap_index.z][i_bucket], d_envmap.y);
atomic_add_f(&render_buffer->d_envmap[2][envmap_index.x][envmap_index.y][envmap_index.z][i_bucket], d_envmap.z);
}
kernel void kernel_ray_tracing_forward(
const int srand,
const uint low_discrepancy,
const global GeometryBuffer *restrict geometry_buffer,
volatile global RenderBuffer *restrict render_buffer,
global IntermediateBuffer *restrict intermediate_buffer) {
local float operand_gather[128u];
const uint i_pixel = get_global_id(0);
const uint i_sample = get_global_id(1);
Ray ray = ray_init(srand, low_discrepancy, render_buffer);
for (int i_bounce = 0; i_bounce <= 4u; ++i_bounce) {
const float clip_near = i_bounce == 0 ? 0.01f : i_bounce == 1 ? 0.0001f : 0.0001f;
float d;
uint2 texture_index;
uint3 envmap_index;
background_intersect(render_buffer, &ray, &envmap_index);
if (false && i_bounce == 0) {
d = render_buffer->ray[23][i_pixel];
if (d <= 0) {
ray.ri = -1;
} else {
ray.r += d * ray.rd;
}
} else {
ray.r += clip_near * ray.rd;
d = scene_intersect(
geometry_buffer,
(const global uint *)render_buffer->texture_cdf,
&ray,
&texture_index);
}
if (i_bounce == 0) {
ray.render_weight = (float3)(1.f, 1.f, 1.f);
ray.pdf = 1.f;
}
if (ray.pdf < 1e-6f || 1.0f / ray.pdf < 1e-6f ||
(ray.render_weight.x < 1e-6f && ray.render_weight.y < 1e-6f && ray.render_weight.z < 1e-6f)) {
ray.ri = -2;
} else {
ray.render_weight /= ray.pdf;
if (i_bounce > 1u)
ray.render_last_weight /= ray.pdf;
}
const float3 envmap = material_envmap(render_buffer, envmap_index);
if (ray.ri == -1) {
ray.r += ray.rd;
ray.rn = ray.rd;
if (i_bounce >= 0u)
ray.render += ray.render_weight * envmap;
}
material_normal(&ray, render_buffer, texture_index, i_bounce);
if (ray.ri == -2) {
ray.r += ray.rd;
ray.pdf = 0.0f;
}
if (i_bounce <= 1u && i_sample < 128u) {
intermediate_buffer->ray_r[0][i_bounce][i_pixel][i_sample] = ray.r.x;
intermediate_buffer->ray_r[1][i_bounce][i_pixel][i_sample] = ray.r.y;
intermediate_buffer->ray_r[2][i_bounce][i_pixel][i_sample] = ray.r.z;
intermediate_buffer->ray_ri[i_bounce][i_pixel][i_sample] = ray.ri;
intermediate_buffer->ray_pdf[i_bounce][i_pixel][i_sample] = ray.pdf;
}
if (ray.ri < 0)
break;
if (i_bounce == 0 && i_sample == 0) {
render_buffer->ray[21][i_pixel] = ray.rt.x;
render_buffer->ray[22][i_pixel] = ray.rt.y;
render_buffer->ray[23][i_pixel] = clip_near + d;
render_buffer->ray[24][i_pixel] = ray.rn.x;
render_buffer->ray[25][i_pixel] = ray.rn.y;
render_buffer->ray[26][i_pixel] = ray.rn.z;
}
if (i_bounce >= 0u) {
const float3 intensity = material_intensity(render_buffer, texture_index, i_bounce);
const float window = material_window(render_buffer, texture_index, i_bounce);
ray.render += ray.render_weight * (intensity + window * envmap);
if (i_bounce > 1u)
ray.render_last += ray.render_last_weight * (intensity + window * envmap);
}
const float3 next_rd = sample_reflection(
geometry_buffer,
render_buffer,
&ray,
texture_index,
i_bounce,
low_discrepancy);
const float3 brdf = diffuse_brdf(render_buffer, &ray, next_rd, texture_index, i_bounce) +
specular_brdf(render_buffer, &ray, next_rd, texture_index, i_bounce);
ray.pdf = diffuse_pdf(&ray, next_rd) + specular_pdf(render_buffer, &ray, next_rd, texture_index, i_bounce);
ray.rd = next_rd;
ray.render_weight *= brdf;
if (i_bounce >= 1u)
ray.render_last_weight *= brdf;
}
if (i_sample < 128u) {
intermediate_buffer->ray_render_last[0][i_pixel][i_sample] = ray.render_last.x;
intermediate_buffer->ray_render_last[1][i_pixel][i_sample] = ray.render_last.y;
intermediate_buffer->ray_render_last[2][i_pixel][i_sample] = ray.render_last.z;
}
local_gather_atomic_add_f(&render_buffer->ray[18][i_pixel], operand_gather, ray.render.x / 1024u);
local_gather_atomic_add_f(&render_buffer->ray[19][i_pixel], operand_gather, ray.render.y / 1024u);
local_gather_atomic_add_f(&render_buffer->ray[20][i_pixel], operand_gather, ray.render.z / 1024u);
}
kernel void kernel_ray_tracing_backward(
const int srand,
const uint low_discrepancy,
const global GeometryBuffer *restrict geometry_buffer,
volatile global RenderBuffer *restrict render_buffer,
const global IntermediateBuffer *restrict intermediate_buffer) {
local float operand_gather[128u];
const uint i_pixel = get_global_id(0);
const uint i_sample = get_global_id(1);
float3 chain_render_weight[1u + 1];
float3 chain_render[1u + 1];
chain_render_weight[0] = (float3)(1.f, 1.f, 1.f);
Ray ray = ray_init(srand, low_discrepancy, render_buffer);
uint2 texture_index = (uint2)(0u, 0u);
uint3 envmap_index = (uint3)(0u, 0u, 0u);
for (int i_bounce = 0; i_bounce <= 1u; ++i_bounce) {
if (ray.ri < 0) {
chain_render_weight[i_bounce] = (float3)(0.f, 0.f, 0.f);
chain_render[i_bounce] = (float3)(0.f, 0.f, 0.f);
continue;
}
const float3 next_r = (float3)(intermediate_buffer->ray_r[0][i_bounce][i_pixel][i_sample],
intermediate_buffer->ray_r[1][i_bounce][i_pixel][i_sample],
intermediate_buffer->ray_r[2][i_bounce][i_pixel][i_sample]);
float3 curr_rd = next_r - ray.r;
curr_rd /= sqrt(dot(curr_rd, curr_rd));
ray.ri = intermediate_buffer->ray_ri[i_bounce][i_pixel][i_sample];
ray.pdf = intermediate_buffer->ray_pdf[i_bounce][i_pixel][i_sample];
if (ray.ri == -2) {
chain_render_weight[i_bounce] = (float3)(0.f, 0.f, 0.f);
chain_render[i_bounce] = (float3)(0.f, 0.f, 0.f);
continue;
}
if (i_bounce > 0) {
const float3 brdf = diffuse_brdf(render_buffer, &ray, curr_rd, texture_index, i_bounce - 1) +
specular_brdf(render_buffer, &ray, curr_rd, texture_index, i_bounce - 1);
chain_render_weight[i_bounce] = brdf / ray.pdf;
}
ray.rd = curr_rd;
ray.r = next_r;
background_intersect(render_buffer, &ray, &envmap_index);
const float3 envmap = material_envmap(render_buffer, envmap_index);
if (ray.ri == -1) {
if (i_bounce >= 0u) {
chain_render[i_bounce] = envmap;
} else {
chain_render[i_bounce] = (float3)(0.f, 0.f, 0.f);
}
continue;
}
if (ray.ri < 0) {
chain_render_weight[i_bounce] = (float3)(0.f, 0.f, 0.f);
chain_render[i_bounce] = (float3)(0.f, 0.f, 0.f);
continue;
}
if (!(false && i_bounce == 0))
update_ray(geometry_buffer, &ray, 0.f, ray.ri, &texture_index);
material_normal(&ray, render_buffer, texture_index, i_bounce);
if (i_bounce >= 0u) {
const float3 intensity = material_intensity(render_buffer, texture_index, i_bounce);
const float window = material_window(render_buffer, texture_index, i_bounce);
chain_render[i_bounce] = intensity + window * envmap;
} else {
chain_render[i_bounce] = (float3)(0.f, 0.f, 0.f);
}
}
chain_render[1u] += (float3)(intermediate_buffer->ray_render_last[0][i_pixel][i_sample],
intermediate_buffer->ray_render_last[1][i_pixel][i_sample],
intermediate_buffer->ray_render_last[2][i_pixel][i_sample]);
for (int i_bounce = 1u - 1; i_bounce >= 0; --i_bounce)
chain_render[i_bounce] += chain_render[i_bounce + 1] * chain_render_weight[i_bounce + 1];
ray = ray_init(srand, low_discrepancy, render_buffer);
float3 d_render = (float3)(render_buffer->ray[18][i_pixel],
render_buffer->ray[19][i_pixel],
render_buffer->ray[20][i_pixel]) / (float)128u;
for (int i_bounce = 0; i_bounce <= 1u; ++i_bounce) {
bool is_continue = false;
if (ray.ri < 0) {
d_render = (float3)(0.f, 0.f, 0.f);
is_continue = true;
goto d_continue_before_brdf;
}
const float3 next_r = (float3)(intermediate_buffer->ray_r[0][i_bounce][i_pixel][i_sample],
intermediate_buffer->ray_r[1][i_bounce][i_pixel][i_sample],
intermediate_buffer->ray_r[2][i_bounce][i_pixel][i_sample]);
float3 curr_rd = next_r - ray.r;
curr_rd /= sqrt(dot(curr_rd, curr_rd));
ray.ri = intermediate_buffer->ray_ri[i_bounce][i_pixel][i_sample];
ray.pdf = intermediate_buffer->ray_pdf[i_bounce][i_pixel][i_sample];
float3 envmap = (float3)(0.f, 0.f, 0.f);
if (ray.ri == -2) {
d_render = (float3)(0.f, 0.f, 0.f);
is_continue = true;
goto d_continue_before_brdf;
}
d_continue_before_brdf:
if (i_bounce > 0) {
const float3 d_brdf = is_continue ? (float3)(0.f, 0.f, 0.f) : d_render * chain_render[i_bounce] / ray.pdf;
d_diffuse_brdf(render_buffer, operand_gather, &ray, curr_rd, texture_index, i_bounce - 1, d_brdf);
d_specular_brdf(render_buffer, operand_gather, &ray, curr_rd, texture_index, i_bounce - 1, d_brdf);
d_render *= chain_render_weight[i_bounce];
}
if (is_continue)
goto d_continue_after_brdf;
ray.rd = curr_rd;
ray.r = next_r;
background_intersect(render_buffer, &ray, &envmap_index);
envmap = material_envmap(render_buffer, envmap_index);
if (ray.ri == -1 && i_bounce >= 0u)
d_material_envmap(render_buffer, envmap_index, d_render);
if (ray.ri < 0) {
d_render = (float3)(0.f, 0.f, 0.f);
goto d_continue_after_brdf;
}
if (!(false && i_bounce == 0))
update_ray(geometry_buffer, &ray, 0.f, ray.ri, &texture_index);
material_normal(&ray, render_buffer, texture_index, i_bounce);
const float window = material_window(render_buffer, texture_index, i_bounce);
if (i_bounce >= 0u)
d_material_envmap(render_buffer, envmap_index, window * d_render);
d_continue_after_brdf:
if (i_bounce >= 0u) {
d_material_intensity(render_buffer, operand_gather, texture_index, i_bounce, d_render);
d_material_window(render_buffer, operand_gather, texture_index, i_bounce, dot(d_render, envmap));
}
}
}
void collect_texture(volatile global float (*data)[256u][256u][8u]) {
volatile global float (*data_offset)[8u] = &(*data)[get_global_id(0)][get_global_id(1)];
for (int i = 1; i < 8u; ++i)
(*data_offset)[0] += (*data_offset)[i];
}
kernel void kernel_ray_tracing_backward_collect_texture(volatile global RenderBuffer *render_buffer) {
for (int i = 0; i < 14; ++i)
collect_texture(&render_buffer->d_texture[i]);
}
void collect_envmap(volatile global float (*data)[512u][512u][8u]) {
volatile global float (*data_offset)[8u] = &(*data)[get_global_id(0)][get_global_id(1)];
for (int i = 1; i < 8u; ++i)
(*data_offset)[0] += (*data_offset)[i];
}
kernel void kernel_ray_tracing_backward_collect_envmap(volatile global RenderBuffer *render_buffer) {
for (int i = 0; i < 3; ++i)
for (int j = 0; j < 6; ++j)
collect_envmap(&render_buffer->d_envmap[i][j]);
}