blob: 6e3fd92343f37fd4b19cfc0cfa264c8d6bbc5812 [file] [log] [blame]
//==============================================================================================
// Originally written in 2016 by Peter Shirley <ptrshrl@gmail.com>
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy (see file COPYING.txt) of the CC0 Public
// Domain Dedication along with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
//
// The original source code is from
// https://github.com/RayTracing/raytracing.github.io/tree/release/src/InOneWeekend
//
// Changes to the original code follows the following license.
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//==============================================================================================
#include "hip/hip_runtime.h"
#include "rtweekend.h"
#include "DeviceArray.h"
#include "PPMImageFile.h"
#include "camera.h"
#include "color.h"
#include "hittable_list.h"
#include "material.h"
#include "sphere.h"
#include <chrono>
#define BLKDIM_X 16
#define BLKDIM_Y 16
__device__ hittable_list *dev_world;
__device__ camera *dev_cam;
__host__ __device__ void init(hittable_list **pWorld, camera **pCam) {
*pWorld = new hittable_list;
hittable_list &world = **pWorld;
auto ground_material = makeShared<lambertian>(color(0.5, 0.5, 0.5));
world.add(makeShared<sphere>(point3(0, -1000, 0), 1000, ground_material));
unsigned rnd = 0;
for (int a = -11; a < 11; a++) {
for (int b = -11; b < 11; b++) {
auto choose_mat = random_double(rnd);
point3 center(a + 0.9 * random_double(rnd), 0.2,
b + 0.9 * random_double(rnd));
if ((center - point3(4, 0.2, 0)).length() > 0.9) {
SharedPtr<material> sphere_material;
if (choose_mat < 0.8) {
// diffuse
auto albedo = color::random(rnd) * color::random(rnd);
sphere_material = makeShared<lambertian>(albedo);
world.add(makeShared<sphere>(center, 0.2, sphere_material));
} else if (choose_mat < 0.95) {
// metal
auto albedo = color::random(0.5, 1, rnd);
auto fuzz = random_double(0, 0.5, rnd);
sphere_material = makeShared<metal>(albedo, fuzz);
world.add(makeShared<sphere>(center, 0.2, sphere_material));
} else {
// glass
sphere_material = makeShared<dielectric>(1.5);
world.add(makeShared<sphere>(center, 0.2, sphere_material));
}
}
}
}
auto material1 = makeShared<dielectric>(1.5);
world.add(makeShared<sphere>(point3(0, 1, 0), 1.0, material1));
auto material2 = makeShared<lambertian>(color(0.4, 0.2, 0.1));
world.add(makeShared<sphere>(point3(-4, 1, 0), 1.0, material2));
auto material3 = makeShared<metal>(color(0.7, 0.6, 0.5), 0.0);
world.add(makeShared<sphere>(point3(4, 1, 0), 1.0, material3));
*pCam = new camera;
camera &cam = **pCam;
cam.aspect_ratio = 16.0 / 9.0;
cam.image_width = 1200;
cam.samples_per_pixel = 10;
cam.max_depth = 20;
cam.vfov = 20;
cam.lookfrom = point3(13, 2, 3);
cam.lookat = point3(0, 0, 0);
cam.vup = vec3(0, 1, 0);
cam.defocus_angle = 0.6;
cam.focus_dist = 10.0;
cam.initialize();
}
__global__ void initKernel() { init(&dev_world, &dev_cam); }
__launch_bounds__(BLKDIM_X *BLKDIM_Y) __global__
void renderKernel(color *image) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < dev_cam->image_width && j < dev_cam->image_height)
dev_cam->renderOnePixel(i, j, *dev_world, image);
}
int main(int argc, char *argv[]) {
bool output_time = false;
bool compare_cpu = false;
// Process command line arguments
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-t") == 0) {
output_time = true;
} else if (strcmp(argv[i], "-c") == 0) {
compare_cpu = true;
}
}
hittable_list *world;
camera *cam;
init(&world, &cam);
const int grid_x = std::ceil((float)cam->image_width / BLKDIM_X);
const int grid_y = std::ceil((float)cam->image_height / BLKDIM_Y);
printf("image width = %d height = %d\n", cam->image_width, cam->image_height);
printf("block size = (%d, %d) grid size = (%d, %d)\n", BLKDIM_X, BLKDIM_Y,
grid_x, grid_y);
PPMImageFile ref_image("ref.ppm");
ref_image.load();
// Render by CPU.
PPMImageFile cpu_image("cpu.ppm", cam->image_width, cam->image_height);
std::chrono::duration<double, std::milli> cpu_duration;
if (compare_cpu) {
printf("Start rendering by CPU.\n");
auto start_cpu = std::chrono::high_resolution_clock::now();
cam->render(*world, cpu_image.getHostPtr());
auto end_cpu = std::chrono::high_resolution_clock::now();
cpu_duration = end_cpu - start_cpu;
cpu_image.normalize();
cpu_image.save();
cpu_image.compare(ref_image);
}
PPMImageFile gpu_image("gpu.ppm", cam->image_width, cam->image_height);
DeviceArray<color> gpu_image_data(cam->image_width * cam->image_height);
// Need to set stack size since there is recursive function.
checkHIP(hipDeviceSetLimit(hipLimitStackSize, 8192));
initKernel<<<1, 1>>>();
checkHIP(hipDeviceSynchronize());
// Render by GPU and measure time.
printf("Start rendering by GPU.\n");
hipEvent_t start_gpu, stop_gpu;
checkHIP(hipEventCreate(&start_gpu));
checkHIP(hipEventCreate(&stop_gpu));
checkHIP(hipEventRecord(start_gpu));
renderKernel<<<dim3(grid_x, grid_y), dim3(BLKDIM_X, BLKDIM_Y)>>>(
gpu_image_data.getDevicePtr());
checkHIP(hipEventRecord(stop_gpu));
checkHIP(hipEventSynchronize(stop_gpu));
float gpu_duration_ms = 0;
checkHIP(hipEventElapsedTime(&gpu_duration_ms, start_gpu, stop_gpu));
checkHIP(hipEventDestroy(start_gpu));
checkHIP(hipEventDestroy(stop_gpu));
printf("Done.\n");
checkHIP(hipDeviceSynchronize());
gpu_image_data.toHost();
checkHIP(hipDeviceSynchronize());
gpu_image.setData(gpu_image_data.getHostPtr());
gpu_image.normalize();
gpu_image.save();
if (compare_cpu)
gpu_image.compare(cpu_image);
gpu_image.compare(ref_image);
// Conditionally output timing information
if (output_time) {
int total_pixels = cam->image_width * cam->image_height;
if (compare_cpu) {
double cpu_time_per_pixel = cpu_duration.count() / total_pixels;
printf("CPU Time per Pixel: %f ms\n", cpu_time_per_pixel);
}
double gpu_time_per_pixel = gpu_duration_ms / total_pixels;
printf("GPU Time per Pixel: %f ms\n", gpu_time_per_pixel);
}
}