ZIm/crates/gpui/src/platform/mac/metal_renderer.rs
Sunli 4fdda8d5a1
gpui: Improve path rendering & global multisample anti-aliasing (#29718)
Currently, the rendering path required creating a texture for each path,
which wasted a large amount of video memory. In our application, simply
drawing some charts resulted in video memory usage as high as 5G.

I removed the step of creating path textures and directly drew the paths
on the rendering target, adding post-processing global multi-sampling
anti-aliasing. Drawing paths no longer requires allocating any
additional video memory and also improves the performance of path
rendering.

Release Notes:

- N/A

---------

Co-authored-by: Jason Lee <huacnlee@gmail.com>
2025-07-02 09:41:42 -07:00

1183 lines
40 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use super::metal_atlas::MetalAtlas;
use crate::{
AtlasTextureId, Background, Bounds, ContentMask, DevicePixels, MonochromeSprite, PaintSurface,
Path, PathVertex, PolychromeSprite, PrimitiveBatch, Quad, ScaledPixels, Scene, Shadow, Size,
Surface, Underline, point, size,
};
use anyhow::Result;
use block::ConcreteBlock;
use cocoa::{
base::{NO, YES},
foundation::{NSSize, NSUInteger},
quartzcore::AutoresizingMask,
};
use core_foundation::base::TCFType;
use core_video::{
metal_texture::CVMetalTextureGetTexture, metal_texture_cache::CVMetalTextureCache,
pixel_buffer::kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
};
use foreign_types::{ForeignType, ForeignTypeRef};
use metal::{
CAMetalLayer, CommandQueue, MTLDrawPrimitivesIndirectArguments, MTLPixelFormat,
MTLResourceOptions, NSRange,
};
use objc::{self, msg_send, sel, sel_impl};
use parking_lot::Mutex;
use std::{cell::Cell, ffi::c_void, mem, ptr, sync::Arc};
// Exported to metal
pub(crate) type PointF = crate::Point<f32>;
#[cfg(not(feature = "runtime_shaders"))]
const SHADERS_METALLIB: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/shaders.metallib"));
#[cfg(feature = "runtime_shaders")]
const SHADERS_SOURCE_FILE: &str = include_str!(concat!(env!("OUT_DIR"), "/stitched_shaders.metal"));
pub type Context = Arc<Mutex<InstanceBufferPool>>;
pub type Renderer = MetalRenderer;
pub unsafe fn new_renderer(
context: self::Context,
_native_window: *mut c_void,
_native_view: *mut c_void,
_bounds: crate::Size<f32>,
_transparent: bool,
) -> Renderer {
MetalRenderer::new(context)
}
pub(crate) struct InstanceBufferPool {
buffer_size: usize,
buffers: Vec<metal::Buffer>,
}
impl Default for InstanceBufferPool {
fn default() -> Self {
Self {
buffer_size: 2 * 1024 * 1024,
buffers: Vec::new(),
}
}
}
pub(crate) struct InstanceBuffer {
metal_buffer: metal::Buffer,
size: usize,
}
impl InstanceBufferPool {
pub(crate) fn reset(&mut self, buffer_size: usize) {
self.buffer_size = buffer_size;
self.buffers.clear();
}
pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
let buffer = self.buffers.pop().unwrap_or_else(|| {
device.new_buffer(
self.buffer_size as u64,
MTLResourceOptions::StorageModeManaged,
)
});
InstanceBuffer {
metal_buffer: buffer,
size: self.buffer_size,
}
}
pub(crate) fn release(&mut self, buffer: InstanceBuffer) {
if buffer.size == self.buffer_size {
self.buffers.push(buffer.metal_buffer)
}
}
}
pub(crate) struct MetalRenderer {
device: metal::Device,
layer: metal::MetalLayer,
presents_with_transaction: bool,
command_queue: CommandQueue,
path_pipeline_state: metal::RenderPipelineState,
shadows_pipeline_state: metal::RenderPipelineState,
quads_pipeline_state: metal::RenderPipelineState,
underlines_pipeline_state: metal::RenderPipelineState,
monochrome_sprites_pipeline_state: metal::RenderPipelineState,
polychrome_sprites_pipeline_state: metal::RenderPipelineState,
surfaces_pipeline_state: metal::RenderPipelineState,
unit_vertices: metal::Buffer,
#[allow(clippy::arc_with_non_send_sync)]
instance_buffer_pool: Arc<Mutex<InstanceBufferPool>>,
sprite_atlas: Arc<MetalAtlas>,
core_video_texture_cache: core_video::metal_texture_cache::CVMetalTextureCache,
sample_count: u64,
msaa_texture: Option<metal::Texture>,
}
impl MetalRenderer {
pub fn new(instance_buffer_pool: Arc<Mutex<InstanceBufferPool>>) -> Self {
// Prefer lowpower integrated GPUs on Intel Mac. On Apple
// Silicon, there is only ever one GPU, so this is equivalent to
// `metal::Device::system_default()`.
let mut devices = metal::Device::all();
devices.sort_by_key(|device| (device.is_removable(), device.is_low_power()));
let Some(device) = devices.pop() else {
log::error!("unable to access a compatible graphics device");
std::process::exit(1);
};
let layer = metal::MetalLayer::new();
layer.set_device(&device);
layer.set_pixel_format(MTLPixelFormat::BGRA8Unorm);
layer.set_opaque(false);
layer.set_maximum_drawable_count(3);
unsafe {
let _: () = msg_send![&*layer, setAllowsNextDrawableTimeout: NO];
let _: () = msg_send![&*layer, setNeedsDisplayOnBoundsChange: YES];
let _: () = msg_send![
&*layer,
setAutoresizingMask: AutoresizingMask::WIDTH_SIZABLE
| AutoresizingMask::HEIGHT_SIZABLE
];
}
#[cfg(feature = "runtime_shaders")]
let library = device
.new_library_with_source(&SHADERS_SOURCE_FILE, &metal::CompileOptions::new())
.expect("error building metal library");
#[cfg(not(feature = "runtime_shaders"))]
let library = device
.new_library_with_data(SHADERS_METALLIB)
.expect("error building metal library");
fn to_float2_bits(point: PointF) -> u64 {
let mut output = point.y.to_bits() as u64;
output <<= 32;
output |= point.x.to_bits() as u64;
output
}
let unit_vertices = [
to_float2_bits(point(0., 0.)),
to_float2_bits(point(1., 0.)),
to_float2_bits(point(0., 1.)),
to_float2_bits(point(0., 1.)),
to_float2_bits(point(1., 0.)),
to_float2_bits(point(1., 1.)),
];
let unit_vertices = device.new_buffer_with_data(
unit_vertices.as_ptr() as *const c_void,
mem::size_of_val(&unit_vertices) as u64,
MTLResourceOptions::StorageModeManaged,
);
let sample_count = [4, 2, 1]
.into_iter()
.find(|count| device.supports_texture_sample_count(*count))
.unwrap_or(1);
let path_pipeline_state = build_pipeline_state(
&device,
&library,
"paths",
"path_vertex",
"path_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let shadows_pipeline_state = build_pipeline_state(
&device,
&library,
"shadows",
"shadow_vertex",
"shadow_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let quads_pipeline_state = build_pipeline_state(
&device,
&library,
"quads",
"quad_vertex",
"quad_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let underlines_pipeline_state = build_pipeline_state(
&device,
&library,
"underlines",
"underline_vertex",
"underline_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let monochrome_sprites_pipeline_state = build_pipeline_state(
&device,
&library,
"monochrome_sprites",
"monochrome_sprite_vertex",
"monochrome_sprite_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let polychrome_sprites_pipeline_state = build_pipeline_state(
&device,
&library,
"polychrome_sprites",
"polychrome_sprite_vertex",
"polychrome_sprite_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let surfaces_pipeline_state = build_pipeline_state(
&device,
&library,
"surfaces",
"surface_vertex",
"surface_fragment",
MTLPixelFormat::BGRA8Unorm,
sample_count,
);
let command_queue = device.new_command_queue();
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone()));
let core_video_texture_cache =
CVMetalTextureCache::new(None, device.clone(), None).unwrap();
let msaa_texture = create_msaa_texture(&device, &layer, sample_count);
Self {
device,
layer,
presents_with_transaction: false,
command_queue,
path_pipeline_state,
shadows_pipeline_state,
quads_pipeline_state,
underlines_pipeline_state,
monochrome_sprites_pipeline_state,
polychrome_sprites_pipeline_state,
surfaces_pipeline_state,
unit_vertices,
instance_buffer_pool,
sprite_atlas,
core_video_texture_cache,
sample_count,
msaa_texture,
}
}
pub fn layer(&self) -> &metal::MetalLayerRef {
&self.layer
}
pub fn layer_ptr(&self) -> *mut CAMetalLayer {
self.layer.as_ptr()
}
pub fn sprite_atlas(&self) -> &Arc<MetalAtlas> {
&self.sprite_atlas
}
pub fn set_presents_with_transaction(&mut self, presents_with_transaction: bool) {
self.presents_with_transaction = presents_with_transaction;
self.layer
.set_presents_with_transaction(presents_with_transaction);
}
pub fn update_drawable_size(&mut self, size: Size<DevicePixels>) {
let size = NSSize {
width: size.width.0 as f64,
height: size.height.0 as f64,
};
unsafe {
let _: () = msg_send![
self.layer(),
setDrawableSize: size
];
}
self.msaa_texture = create_msaa_texture(&self.device, &self.layer, self.sample_count);
}
pub fn update_transparency(&self, _transparent: bool) {
// todo(mac)?
}
pub fn destroy(&self) {
// nothing to do
}
pub fn draw(&mut self, scene: &Scene) {
let layer = self.layer.clone();
let viewport_size = layer.drawable_size();
let viewport_size: Size<DevicePixels> = size(
(viewport_size.width.ceil() as i32).into(),
(viewport_size.height.ceil() as i32).into(),
);
let drawable = if let Some(drawable) = layer.next_drawable() {
drawable
} else {
log::error!(
"failed to retrieve next drawable, drawable size: {:?}",
viewport_size
);
return;
};
loop {
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
let command_buffer =
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
match command_buffer {
Ok(command_buffer) => {
let instance_buffer_pool = self.instance_buffer_pool.clone();
let instance_buffer = Cell::new(Some(instance_buffer));
let block = ConcreteBlock::new(move |_| {
if let Some(instance_buffer) = instance_buffer.take() {
instance_buffer_pool.lock().release(instance_buffer);
}
});
let block = block.copy();
command_buffer.add_completed_handler(&block);
if self.presents_with_transaction {
command_buffer.commit();
command_buffer.wait_until_scheduled();
drawable.present();
} else {
command_buffer.present_drawable(drawable);
command_buffer.commit();
}
return;
}
Err(err) => {
log::error!(
"failed to render: {}. retrying with larger instance buffer size",
err
);
let mut instance_buffer_pool = self.instance_buffer_pool.lock();
let buffer_size = instance_buffer_pool.buffer_size;
if buffer_size >= 256 * 1024 * 1024 {
log::error!("instance buffer size grew too large: {}", buffer_size);
break;
}
instance_buffer_pool.reset(buffer_size * 2);
log::info!(
"increased instance buffer size to {}",
instance_buffer_pool.buffer_size
);
}
}
}
}
fn draw_primitives(
&mut self,
scene: &Scene,
instance_buffer: &mut InstanceBuffer,
drawable: &metal::MetalDrawableRef,
viewport_size: Size<DevicePixels>,
) -> Result<metal::CommandBuffer> {
let command_queue = self.command_queue.clone();
let command_buffer = command_queue.new_command_buffer();
let mut instance_offset = 0;
let render_pass_descriptor = metal::RenderPassDescriptor::new();
let color_attachment = render_pass_descriptor
.color_attachments()
.object_at(0)
.unwrap();
if let Some(msaa_texture_ref) = self.msaa_texture.as_deref() {
color_attachment.set_texture(Some(msaa_texture_ref));
color_attachment.set_load_action(metal::MTLLoadAction::Clear);
color_attachment.set_store_action(metal::MTLStoreAction::MultisampleResolve);
color_attachment.set_resolve_texture(Some(drawable.texture()));
} else {
color_attachment.set_load_action(metal::MTLLoadAction::Clear);
color_attachment.set_texture(Some(drawable.texture()));
color_attachment.set_store_action(metal::MTLStoreAction::Store);
}
let alpha = if self.layer.is_opaque() { 1. } else { 0. };
color_attachment.set_clear_color(metal::MTLClearColor::new(0., 0., 0., alpha));
let command_encoder = command_buffer.new_render_command_encoder(render_pass_descriptor);
command_encoder.set_viewport(metal::MTLViewport {
originX: 0.0,
originY: 0.0,
width: i32::from(viewport_size.width) as f64,
height: i32::from(viewport_size.height) as f64,
znear: 0.0,
zfar: 1.0,
});
for batch in scene.batches() {
let ok = match batch {
PrimitiveBatch::Shadows(shadows) => self.draw_shadows(
shadows,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::Quads(quads) => self.draw_quads(
quads,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::Paths(paths) => self.draw_paths(
paths,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::Underlines(underlines) => self.draw_underlines(
underlines,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::MonochromeSprites {
texture_id,
sprites,
} => self.draw_monochrome_sprites(
texture_id,
sprites,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::PolychromeSprites {
texture_id,
sprites,
} => self.draw_polychrome_sprites(
texture_id,
sprites,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
PrimitiveBatch::Surfaces(surfaces) => self.draw_surfaces(
surfaces,
instance_buffer,
&mut instance_offset,
viewport_size,
command_encoder,
),
};
if !ok {
command_encoder.end_encoding();
anyhow::bail!(
"scene too large: {} paths, {} shadows, {} quads, {} underlines, {} mono, {} poly, {} surfaces",
scene.paths.len(),
scene.shadows.len(),
scene.quads.len(),
scene.underlines.len(),
scene.monochrome_sprites.len(),
scene.polychrome_sprites.len(),
scene.surfaces.len(),
);
}
}
command_encoder.end_encoding();
instance_buffer.metal_buffer.did_modify_range(NSRange {
location: 0,
length: instance_offset as NSUInteger,
});
Ok(command_buffer.to_owned())
}
fn draw_shadows(
&self,
shadows: &[Shadow],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if shadows.is_empty() {
return true;
}
align_offset(instance_offset);
command_encoder.set_render_pipeline_state(&self.shadows_pipeline_state);
command_encoder.set_vertex_buffer(
ShadowInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_buffer(
ShadowInputIndex::Shadows as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_fragment_buffer(
ShadowInputIndex::Shadows as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
ShadowInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
let shadow_bytes_len = mem::size_of_val(shadows);
let buffer_contents =
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
let next_offset = *instance_offset + shadow_bytes_len;
if next_offset > instance_buffer.size {
return false;
}
unsafe {
ptr::copy_nonoverlapping(
shadows.as_ptr() as *const u8,
buffer_contents,
shadow_bytes_len,
);
}
command_encoder.draw_primitives_instanced(
metal::MTLPrimitiveType::Triangle,
0,
6,
shadows.len() as u64,
);
*instance_offset = next_offset;
true
}
fn draw_quads(
&self,
quads: &[Quad],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if quads.is_empty() {
return true;
}
align_offset(instance_offset);
command_encoder.set_render_pipeline_state(&self.quads_pipeline_state);
command_encoder.set_vertex_buffer(
QuadInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_buffer(
QuadInputIndex::Quads as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_fragment_buffer(
QuadInputIndex::Quads as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
QuadInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
let quad_bytes_len = mem::size_of_val(quads);
let buffer_contents =
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
let next_offset = *instance_offset + quad_bytes_len;
if next_offset > instance_buffer.size {
return false;
}
unsafe {
ptr::copy_nonoverlapping(quads.as_ptr() as *const u8, buffer_contents, quad_bytes_len);
}
command_encoder.draw_primitives_instanced(
metal::MTLPrimitiveType::Triangle,
0,
6,
quads.len() as u64,
);
*instance_offset = next_offset;
true
}
fn draw_paths(
&self,
paths: &[Path<ScaledPixels>],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if paths.is_empty() {
return true;
}
command_encoder.set_render_pipeline_state(&self.path_pipeline_state);
unsafe {
let base_addr = instance_buffer.metal_buffer.contents();
let mut p = (base_addr as *mut u8).add(*instance_offset);
let mut draw_indirect_commands = Vec::with_capacity(paths.len());
// copy vertices
let vertices_offset = (p as usize) - (base_addr as usize);
let mut first_vertex = 0;
for (i, path) in paths.iter().enumerate() {
if (p as usize) - (base_addr as usize)
+ (mem::size_of::<PathVertex<ScaledPixels>>() * path.vertices.len())
> instance_buffer.size
{
return false;
}
for v in &path.vertices {
*(p as *mut PathVertex<ScaledPixels>) = PathVertex {
xy_position: v.xy_position,
content_mask: ContentMask {
bounds: path.content_mask.bounds,
},
};
p = p.add(mem::size_of::<PathVertex<ScaledPixels>>());
}
draw_indirect_commands.push(MTLDrawPrimitivesIndirectArguments {
vertexCount: path.vertices.len() as u32,
instanceCount: 1,
vertexStart: first_vertex,
baseInstance: i as u32,
});
first_vertex += path.vertices.len() as u32;
}
// copy sprites
let sprites_offset = (p as u64) - (base_addr as u64);
if (p as usize) - (base_addr as usize) + (mem::size_of::<PathSprite>() * paths.len())
> instance_buffer.size
{
return false;
}
for path in paths {
*(p as *mut PathSprite) = PathSprite {
bounds: path.bounds,
color: path.color,
};
p = p.add(mem::size_of::<PathSprite>());
}
// copy indirect commands
let icb_bytes_len = mem::size_of_val(draw_indirect_commands.as_slice());
let icb_offset = (p as u64) - (base_addr as u64);
if (p as usize) - (base_addr as usize) + icb_bytes_len > instance_buffer.size {
return false;
}
ptr::copy_nonoverlapping(
draw_indirect_commands.as_ptr() as *const u8,
p,
icb_bytes_len,
);
p = p.add(icb_bytes_len);
// draw path
command_encoder.set_vertex_buffer(
PathInputIndex::Vertices as u64,
Some(&instance_buffer.metal_buffer),
vertices_offset as u64,
);
command_encoder.set_vertex_bytes(
PathInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
command_encoder.set_vertex_buffer(
PathInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
sprites_offset,
);
command_encoder.set_fragment_buffer(
PathInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
sprites_offset,
);
for i in 0..paths.len() {
command_encoder.draw_primitives_indirect(
metal::MTLPrimitiveType::Triangle,
&instance_buffer.metal_buffer,
icb_offset
+ (i * std::mem::size_of::<MTLDrawPrimitivesIndirectArguments>()) as u64,
);
}
*instance_offset = (p as usize) - (base_addr as usize);
}
true
}
fn draw_underlines(
&self,
underlines: &[Underline],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if underlines.is_empty() {
return true;
}
align_offset(instance_offset);
command_encoder.set_render_pipeline_state(&self.underlines_pipeline_state);
command_encoder.set_vertex_buffer(
UnderlineInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_buffer(
UnderlineInputIndex::Underlines as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_fragment_buffer(
UnderlineInputIndex::Underlines as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
UnderlineInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
let underline_bytes_len = mem::size_of_val(underlines);
let buffer_contents =
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
let next_offset = *instance_offset + underline_bytes_len;
if next_offset > instance_buffer.size {
return false;
}
unsafe {
ptr::copy_nonoverlapping(
underlines.as_ptr() as *const u8,
buffer_contents,
underline_bytes_len,
);
}
command_encoder.draw_primitives_instanced(
metal::MTLPrimitiveType::Triangle,
0,
6,
underlines.len() as u64,
);
*instance_offset = next_offset;
true
}
fn draw_monochrome_sprites(
&self,
texture_id: AtlasTextureId,
sprites: &[MonochromeSprite],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if sprites.is_empty() {
return true;
}
align_offset(instance_offset);
let sprite_bytes_len = mem::size_of_val(sprites);
let buffer_contents =
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
let next_offset = *instance_offset + sprite_bytes_len;
if next_offset > instance_buffer.size {
return false;
}
let texture = self.sprite_atlas.metal_texture(texture_id);
let texture_size = size(
DevicePixels(texture.width() as i32),
DevicePixels(texture.height() as i32),
);
command_encoder.set_render_pipeline_state(&self.monochrome_sprites_pipeline_state);
command_encoder.set_vertex_buffer(
SpriteInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_buffer(
SpriteInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
SpriteInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
command_encoder.set_vertex_bytes(
SpriteInputIndex::AtlasTextureSize as u64,
mem::size_of_val(&texture_size) as u64,
&texture_size as *const Size<DevicePixels> as *const _,
);
command_encoder.set_fragment_buffer(
SpriteInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_fragment_texture(SpriteInputIndex::AtlasTexture as u64, Some(&texture));
unsafe {
ptr::copy_nonoverlapping(
sprites.as_ptr() as *const u8,
buffer_contents,
sprite_bytes_len,
);
}
command_encoder.draw_primitives_instanced(
metal::MTLPrimitiveType::Triangle,
0,
6,
sprites.len() as u64,
);
*instance_offset = next_offset;
true
}
fn draw_polychrome_sprites(
&self,
texture_id: AtlasTextureId,
sprites: &[PolychromeSprite],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
if sprites.is_empty() {
return true;
}
align_offset(instance_offset);
let texture = self.sprite_atlas.metal_texture(texture_id);
let texture_size = size(
DevicePixels(texture.width() as i32),
DevicePixels(texture.height() as i32),
);
command_encoder.set_render_pipeline_state(&self.polychrome_sprites_pipeline_state);
command_encoder.set_vertex_buffer(
SpriteInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_buffer(
SpriteInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
SpriteInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
command_encoder.set_vertex_bytes(
SpriteInputIndex::AtlasTextureSize as u64,
mem::size_of_val(&texture_size) as u64,
&texture_size as *const Size<DevicePixels> as *const _,
);
command_encoder.set_fragment_buffer(
SpriteInputIndex::Sprites as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_fragment_texture(SpriteInputIndex::AtlasTexture as u64, Some(&texture));
let sprite_bytes_len = mem::size_of_val(sprites);
let buffer_contents =
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
let next_offset = *instance_offset + sprite_bytes_len;
if next_offset > instance_buffer.size {
return false;
}
unsafe {
ptr::copy_nonoverlapping(
sprites.as_ptr() as *const u8,
buffer_contents,
sprite_bytes_len,
);
}
command_encoder.draw_primitives_instanced(
metal::MTLPrimitiveType::Triangle,
0,
6,
sprites.len() as u64,
);
*instance_offset = next_offset;
true
}
fn draw_surfaces(
&mut self,
surfaces: &[PaintSurface],
instance_buffer: &mut InstanceBuffer,
instance_offset: &mut usize,
viewport_size: Size<DevicePixels>,
command_encoder: &metal::RenderCommandEncoderRef,
) -> bool {
command_encoder.set_render_pipeline_state(&self.surfaces_pipeline_state);
command_encoder.set_vertex_buffer(
SurfaceInputIndex::Vertices as u64,
Some(&self.unit_vertices),
0,
);
command_encoder.set_vertex_bytes(
SurfaceInputIndex::ViewportSize as u64,
mem::size_of_val(&viewport_size) as u64,
&viewport_size as *const Size<DevicePixels> as *const _,
);
for surface in surfaces {
let texture_size = size(
DevicePixels::from(surface.image_buffer.get_width() as i32),
DevicePixels::from(surface.image_buffer.get_height() as i32),
);
assert_eq!(
surface.image_buffer.get_pixel_format(),
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
);
let y_texture = self
.core_video_texture_cache
.create_texture_from_image(
surface.image_buffer.as_concrete_TypeRef(),
None,
MTLPixelFormat::R8Unorm,
surface.image_buffer.get_width_of_plane(0),
surface.image_buffer.get_height_of_plane(0),
0,
)
.unwrap();
let cb_cr_texture = self
.core_video_texture_cache
.create_texture_from_image(
surface.image_buffer.as_concrete_TypeRef(),
None,
MTLPixelFormat::RG8Unorm,
surface.image_buffer.get_width_of_plane(1),
surface.image_buffer.get_height_of_plane(1),
1,
)
.unwrap();
align_offset(instance_offset);
let next_offset = *instance_offset + mem::size_of::<Surface>();
if next_offset > instance_buffer.size {
return false;
}
command_encoder.set_vertex_buffer(
SurfaceInputIndex::Surfaces as u64,
Some(&instance_buffer.metal_buffer),
*instance_offset as u64,
);
command_encoder.set_vertex_bytes(
SurfaceInputIndex::TextureSize as u64,
mem::size_of_val(&texture_size) as u64,
&texture_size as *const Size<DevicePixels> as *const _,
);
// let y_texture = y_texture.get_texture().unwrap().
command_encoder.set_fragment_texture(SurfaceInputIndex::YTexture as u64, unsafe {
let texture = CVMetalTextureGetTexture(y_texture.as_concrete_TypeRef());
Some(metal::TextureRef::from_ptr(texture as *mut _))
});
command_encoder.set_fragment_texture(SurfaceInputIndex::CbCrTexture as u64, unsafe {
let texture = CVMetalTextureGetTexture(cb_cr_texture.as_concrete_TypeRef());
Some(metal::TextureRef::from_ptr(texture as *mut _))
});
unsafe {
let buffer_contents = (instance_buffer.metal_buffer.contents() as *mut u8)
.add(*instance_offset)
as *mut SurfaceBounds;
ptr::write(
buffer_contents,
SurfaceBounds {
bounds: surface.bounds,
content_mask: surface.content_mask.clone(),
},
);
}
command_encoder.draw_primitives(metal::MTLPrimitiveType::Triangle, 0, 6);
*instance_offset = next_offset;
}
true
}
}
fn build_pipeline_state(
device: &metal::DeviceRef,
library: &metal::LibraryRef,
label: &str,
vertex_fn_name: &str,
fragment_fn_name: &str,
pixel_format: metal::MTLPixelFormat,
sample_count: u64,
) -> metal::RenderPipelineState {
let vertex_fn = library
.get_function(vertex_fn_name, None)
.expect("error locating vertex function");
let fragment_fn = library
.get_function(fragment_fn_name, None)
.expect("error locating fragment function");
let descriptor = metal::RenderPipelineDescriptor::new();
descriptor.set_label(label);
descriptor.set_vertex_function(Some(vertex_fn.as_ref()));
descriptor.set_fragment_function(Some(fragment_fn.as_ref()));
descriptor.set_sample_count(sample_count);
let color_attachment = descriptor.color_attachments().object_at(0).unwrap();
color_attachment.set_pixel_format(pixel_format);
color_attachment.set_blending_enabled(true);
color_attachment.set_rgb_blend_operation(metal::MTLBlendOperation::Add);
color_attachment.set_alpha_blend_operation(metal::MTLBlendOperation::Add);
color_attachment.set_source_rgb_blend_factor(metal::MTLBlendFactor::SourceAlpha);
color_attachment.set_source_alpha_blend_factor(metal::MTLBlendFactor::One);
color_attachment.set_destination_rgb_blend_factor(metal::MTLBlendFactor::OneMinusSourceAlpha);
color_attachment.set_destination_alpha_blend_factor(metal::MTLBlendFactor::One);
device
.new_render_pipeline_state(&descriptor)
.expect("could not create render pipeline state")
}
// Align to multiples of 256 make Metal happy.
fn align_offset(offset: &mut usize) {
*offset = (*offset).div_ceil(256) * 256;
}
fn create_msaa_texture(
device: &metal::Device,
layer: &metal::MetalLayer,
sample_count: u64,
) -> Option<metal::Texture> {
let viewport_size = layer.drawable_size();
let width = viewport_size.width.ceil() as u64;
let height = viewport_size.height.ceil() as u64;
if width == 0 || height == 0 {
return None;
}
if sample_count <= 1 {
return None;
}
let texture_descriptor = metal::TextureDescriptor::new();
texture_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
// MTLStorageMode default is `shared` only for Apple silicon GPUs. Use `private` for Apple and Intel GPUs both.
// Reference: https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
texture_descriptor.set_width(width);
texture_descriptor.set_height(height);
texture_descriptor.set_pixel_format(layer.pixel_format());
texture_descriptor.set_usage(metal::MTLTextureUsage::RenderTarget);
texture_descriptor.set_sample_count(sample_count);
let metal_texture = device.new_texture(&texture_descriptor);
Some(metal_texture)
}
#[repr(C)]
enum ShadowInputIndex {
Vertices = 0,
Shadows = 1,
ViewportSize = 2,
}
#[repr(C)]
enum QuadInputIndex {
Vertices = 0,
Quads = 1,
ViewportSize = 2,
}
#[repr(C)]
enum UnderlineInputIndex {
Vertices = 0,
Underlines = 1,
ViewportSize = 2,
}
#[repr(C)]
enum SpriteInputIndex {
Vertices = 0,
Sprites = 1,
ViewportSize = 2,
AtlasTextureSize = 3,
AtlasTexture = 4,
}
#[repr(C)]
enum SurfaceInputIndex {
Vertices = 0,
Surfaces = 1,
ViewportSize = 2,
TextureSize = 3,
YTexture = 4,
CbCrTexture = 5,
}
#[repr(C)]
enum PathInputIndex {
Vertices = 0,
ViewportSize = 1,
Sprites = 2,
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[repr(C)]
pub struct PathSprite {
pub bounds: Bounds<ScaledPixels>,
pub color: Background,
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[repr(C)]
pub struct SurfaceBounds {
pub bounds: Bounds<ScaledPixels>,
pub content_mask: ContentMask<ScaledPixels>,
}