| use std::borrow::Cow; |
| |
| use bytemuck::{Pod, Zeroable}; |
| use surpass::Lines; |
| use wgpu::util::DeviceExt; |
| |
| #[derive(Debug)] |
| pub struct RasterizerContext { |
| prepare_lines_pipeline: wgpu::ComputePipeline, |
| prepare_lines_layout: wgpu::BindGroupLayout, |
| rasterizer_pipeline: wgpu::ComputePipeline, |
| rasterizer_layout: wgpu::BindGroupLayout, |
| } |
| |
| pub fn init(device: &wgpu::Device) -> RasterizerContext { |
| let prepare_lines_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { |
| label: Some("prepare_lines.wgsl"), |
| source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("prepare_lines.wgsl"))), |
| }); |
| let prepare_lines_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { |
| label: Some("prepareLines"), |
| layout: None, |
| module: &prepare_lines_module, |
| entry_point: "prepareLines", |
| }); |
| let prepare_lines_layout = prepare_lines_pipeline.get_bind_group_layout(0); |
| |
| let rasterizer_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { |
| label: Some("rasterizer.wgsl"), |
| source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("rasterizer.wgsl"))), |
| }); |
| let rasterizer_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { |
| label: Some("rasterize"), |
| layout: None, |
| module: &rasterizer_module, |
| entry_point: "rasterize", |
| }); |
| let rasterizer_layout = rasterizer_pipeline.get_bind_group_layout(0); |
| |
| RasterizerContext { |
| prepare_lines_pipeline, |
| prepare_lines_layout, |
| rasterizer_pipeline, |
| rasterizer_layout, |
| } |
| } |
| |
| #[derive(Copy, Clone, Debug, Pod, Zeroable)] |
| #[repr(C)] |
| pub struct Config { |
| pub lines_count: u32, |
| pub segments_count: u32, |
| pub segments_padded_count: u32, |
| } |
| |
| impl Config { |
| pub fn as_byte_slice(&self) -> &[u8] { |
| bytemuck::bytes_of(self) |
| } |
| } |
| |
| fn div_ceil(a: u32, b: u32) -> u32 { |
| (a + b - 1) / b |
| } |
| |
| pub fn encode( |
| device: &wgpu::Device, |
| encoder: &mut wgpu::CommandEncoder, |
| context: &RasterizerContext, |
| lines: &Lines, |
| segments_padded_count: usize, |
| segments_buffer: &wgpu::Buffer, |
| timestamp: Option<(&wgpu::QuerySet, u32, u32)>, |
| ) { |
| let config = Config { |
| lines_count: lines.lines_count() as u32, |
| segments_count: lines.segments_count() as u32, |
| segments_padded_count: segments_padded_count as u32, |
| }; |
| let config_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { |
| label: None, |
| contents: config.as_byte_slice(), |
| usage: wgpu::BufferUsages::UNIFORM, |
| }); |
| |
| let points_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { |
| label: Some("points"), |
| contents: bytemuck::cast_slice(&lines.points), |
| usage: wgpu::BufferUsages::STORAGE, |
| }); |
| |
| let orders_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { |
| label: Some("orders"), |
| contents: bytemuck::cast_slice(&lines.orders), |
| usage: wgpu::BufferUsages::STORAGE, |
| }); |
| |
| // The struct prepare_lines.wgsl:Lines has 9 x 32 bits fields. |
| let lines_buffer = device.create_buffer(&wgpu::BufferDescriptor { |
| label: Some("lines"), |
| usage: wgpu::BufferUsages::STORAGE, |
| size: (config.lines_count * 9 * 4) as wgpu::BufferAddress, |
| mapped_at_creation: false, |
| }); |
| |
| let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { |
| label: None, |
| layout: &context.prepare_lines_layout, |
| entries: &[ |
| wgpu::BindGroupEntry { binding: 0, resource: config_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 1, resource: points_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 2, resource: orders_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 3, resource: lines_buffer.as_entire_binding() }, |
| ], |
| }); |
| |
| { |
| let mut pass = encoder |
| .begin_compute_pass(&wgpu::ComputePassDescriptor { label: Some("prepare_lines") }); |
| pass.set_pipeline(&context.prepare_lines_pipeline); |
| pass.set_bind_group(0, &bind_group, &[]); |
| if let Some((timestamp, start_index, _)) = timestamp { |
| pass.write_timestamp(timestamp, start_index); |
| } |
| |
| let workgroup_size: u32 = 128; |
| let max_invocation_count: u32 = 16; |
| pass.dispatch_workgroups( |
| div_ceil(config.lines_count, workgroup_size).min(max_invocation_count), |
| 1, |
| 1, |
| ); |
| if let Some((timestamp, _, end_index)) = timestamp { |
| pass.write_timestamp(timestamp, end_index); |
| } |
| } |
| |
| let lines_lenghts_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { |
| label: Some("line_lenghts"), |
| contents: bytemuck::cast_slice(&lines.lengths), |
| usage: wgpu::BufferUsages::STORAGE, |
| }); |
| |
| let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { |
| label: None, |
| layout: &context.rasterizer_layout, |
| entries: &[ |
| wgpu::BindGroupEntry { binding: 0, resource: config_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 1, resource: lines_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 2, resource: lines_lenghts_buffer.as_entire_binding() }, |
| wgpu::BindGroupEntry { binding: 3, resource: segments_buffer.as_entire_binding() }, |
| ], |
| }); |
| |
| { |
| let mut pass = |
| encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: Some("rasterize") }); |
| pass.set_pipeline(&context.rasterizer_pipeline); |
| pass.set_bind_group(0, &bind_group, &[]); |
| if let Some((timestamp, start_index, _)) = timestamp { |
| pass.write_timestamp(timestamp, start_index); |
| } |
| let workgroup_size: u32 = 128; |
| let max_invocation_count: u32 = 16; |
| pass.dispatch_workgroups( |
| div_ceil(segments_padded_count as u32, workgroup_size).min(max_invocation_count), |
| 1, |
| 1, |
| ); |
| if let Some((timestamp, _, end_index)) = timestamp { |
| pass.write_timestamp(timestamp, end_index); |
| } |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use std::collections::HashMap; |
| |
| use rand::{distributions::Uniform, prelude::SmallRng, Rng, SeedableRng}; |
| use surpass::{ |
| rasterizer::{PixelSegment, Rasterizer}, |
| GeomId, Layer, Lines, LinesBuilder, Order, PathBuilder, Point, |
| }; |
| const TW: usize = 16; |
| const TH: usize = 4; |
| |
| fn run_rasterizer(lines: &Lines) -> Vec<PixelSegment<TW, TH>> { |
| let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY); |
| let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { |
| power_preference: wgpu::PowerPreference::HighPerformance, |
| ..Default::default() |
| })) |
| .unwrap(); |
| |
| let (device, queue) = |
| pollster::block_on(adapter.request_device(&Default::default(), None)).unwrap(); |
| let mut encoder = device.create_command_encoder(&Default::default()); |
| |
| let context = crate::init(&device); |
| |
| let segments_padded_count = lines.segments_count(); |
| let size = (segments_padded_count * std::mem::size_of::<u64>()) as wgpu::BufferAddress; |
| |
| let segments_buffer = device.create_buffer(&wgpu::BufferDescriptor { |
| label: None, |
| size, |
| usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, |
| mapped_at_creation: false, |
| }); |
| let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { |
| label: None, |
| size, |
| usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, |
| mapped_at_creation: false, |
| }); |
| |
| crate::encode( |
| &device, |
| &mut encoder, |
| &context, |
| &lines, |
| segments_padded_count, |
| &segments_buffer, |
| None, |
| ); |
| |
| encoder.copy_buffer_to_buffer(&segments_buffer, 0, &staging_buffer, 0, size); |
| |
| queue.submit(Some(encoder.finish())); |
| device.poll(wgpu::Maintain::Wait); |
| |
| let mut actual_segments = { |
| let buffer_slice = staging_buffer.slice(..); |
| |
| buffer_slice.map_async(wgpu::MapMode::Read, |_| ()); |
| device.poll(wgpu::Maintain::Wait); |
| |
| let range = buffer_slice.get_mapped_range(); |
| let (before, actual_segments, after) = |
| unsafe { range.align_to::<PixelSegment<TW, TH>>() }; |
| assert!(before.is_empty()); |
| assert!(after.is_empty()); |
| actual_segments.to_vec() |
| }; |
| |
| staging_buffer.unmap(); |
| actual_segments.sort(); |
| |
| return actual_segments; |
| } |
| |
| #[test] |
| fn rasterize_triangle() { |
| let mut lines_builder = LinesBuilder::new(); |
| let path = PathBuilder::new() |
| .move_to(Point { x: 1.0, y: 1.0 }) |
| .line_to(Point { x: 1.0, y: 4.0 }) |
| .line_to(Point { x: 2.0, y: 4.0 }) |
| .build(); |
| let geom_id = GeomId::default(); |
| lines_builder.push_path(geom_id, &path); |
| let lines = lines_builder.build_for_gpu(|_| { |
| Some(Layer { order: Some(Order::new(1).unwrap()), ..Default::default() }) |
| }); |
| let actual_segments = run_rasterizer(&lines); |
| let expected_segments = [ |
| PixelSegment::new(1, 0, 0, 1, 1, 27, -16), |
| PixelSegment::new(1, 0, 0, 1, 1, 32, 16), |
| PixelSegment::new(1, 0, 0, 1, 2, 16, -16), |
| PixelSegment::new(1, 0, 0, 1, 2, 32, 16), |
| PixelSegment::new(1, 0, 0, 1, 3, 5, -16), |
| PixelSegment::new(1, 0, 0, 1, 3, 32, 16), |
| ] |
| .to_vec(); |
| assert_eq!(expected_segments, actual_segments); |
| } |
| |
| #[test] |
| fn rasterize_random_quad() { |
| let mut lines_builder = LinesBuilder::new(); |
| let mut rng = SmallRng::seed_from_u64(0); |
| // Using a small range to workaround the precision difference between CPU and GPU |
| // that sometimes lead to rounding differences. |
| let coord_range = Uniform::new(-8i32, 16); |
| let mut rnd_point = |
| || Point { x: rng.sample(coord_range) as f32, y: rng.sample(coord_range) as f32 }; |
| let mut geom_id = GeomId::default(); |
| let mut geom_id_to_order = HashMap::new(); |
| let mut order = 0; |
| |
| for _ in 0..4096 { |
| let path = PathBuilder::new() |
| .move_to(rnd_point()) |
| .line_to(rnd_point()) |
| .line_to(rnd_point()) |
| .line_to(rnd_point()) |
| .build(); |
| |
| lines_builder.push_path(geom_id, &path); |
| geom_id_to_order.insert(geom_id, Order::new(order).unwrap()); |
| geom_id = geom_id.next(); |
| order += 1; |
| } |
| |
| let layers = |geom_id| { |
| Some(Layer { order: geom_id_to_order.get(&geom_id).copied(), ..Default::default() }) |
| }; |
| |
| let gpu_segments = { |
| let lines = lines_builder.build_for_gpu(layers); |
| let segments = run_rasterizer(&lines); |
| lines_builder = lines.unwrap(); |
| segments |
| }; |
| |
| let mut cpu_segments = { |
| let lines = lines_builder.build(layers); |
| let mut rasterizer = Rasterizer::<TW, TH>::default(); |
| rasterizer.rasterize(&lines); |
| rasterizer.segments().to_vec() |
| }; |
| |
| cpu_segments.sort(); |
| cpu_segments.iter().zip(gpu_segments.iter()).enumerate().for_each(|(i, (c, g))| { |
| assert_eq!(c, g, "Segment {}", i); |
| }); |
| } |
| } |