
Currently, the rendering path required creating a texture for each path, which wasted a large amount of video memory. In our application, simply drawing some charts resulted in video memory usage as high as 5G. I removed the step of creating path textures and directly drew the paths on the rendering target, adding post-processing global multi-sampling anti-aliasing. Drawing paths no longer requires allocating any additional video memory and also improves the performance of path rendering. Release Notes: - N/A --------- Co-authored-by: Jason Lee <huacnlee@gmail.com>
1183 lines
40 KiB
Rust
1183 lines
40 KiB
Rust
use super::metal_atlas::MetalAtlas;
|
||
use crate::{
|
||
AtlasTextureId, Background, Bounds, ContentMask, DevicePixels, MonochromeSprite, PaintSurface,
|
||
Path, PathVertex, PolychromeSprite, PrimitiveBatch, Quad, ScaledPixels, Scene, Shadow, Size,
|
||
Surface, Underline, point, size,
|
||
};
|
||
use anyhow::Result;
|
||
use block::ConcreteBlock;
|
||
use cocoa::{
|
||
base::{NO, YES},
|
||
foundation::{NSSize, NSUInteger},
|
||
quartzcore::AutoresizingMask,
|
||
};
|
||
use core_foundation::base::TCFType;
|
||
use core_video::{
|
||
metal_texture::CVMetalTextureGetTexture, metal_texture_cache::CVMetalTextureCache,
|
||
pixel_buffer::kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
|
||
};
|
||
use foreign_types::{ForeignType, ForeignTypeRef};
|
||
use metal::{
|
||
CAMetalLayer, CommandQueue, MTLDrawPrimitivesIndirectArguments, MTLPixelFormat,
|
||
MTLResourceOptions, NSRange,
|
||
};
|
||
use objc::{self, msg_send, sel, sel_impl};
|
||
use parking_lot::Mutex;
|
||
use std::{cell::Cell, ffi::c_void, mem, ptr, sync::Arc};
|
||
|
||
// Exported to metal
|
||
pub(crate) type PointF = crate::Point<f32>;
|
||
|
||
#[cfg(not(feature = "runtime_shaders"))]
|
||
const SHADERS_METALLIB: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/shaders.metallib"));
|
||
#[cfg(feature = "runtime_shaders")]
|
||
const SHADERS_SOURCE_FILE: &str = include_str!(concat!(env!("OUT_DIR"), "/stitched_shaders.metal"));
|
||
|
||
pub type Context = Arc<Mutex<InstanceBufferPool>>;
|
||
pub type Renderer = MetalRenderer;
|
||
|
||
pub unsafe fn new_renderer(
|
||
context: self::Context,
|
||
_native_window: *mut c_void,
|
||
_native_view: *mut c_void,
|
||
_bounds: crate::Size<f32>,
|
||
_transparent: bool,
|
||
) -> Renderer {
|
||
MetalRenderer::new(context)
|
||
}
|
||
|
||
pub(crate) struct InstanceBufferPool {
|
||
buffer_size: usize,
|
||
buffers: Vec<metal::Buffer>,
|
||
}
|
||
|
||
impl Default for InstanceBufferPool {
|
||
fn default() -> Self {
|
||
Self {
|
||
buffer_size: 2 * 1024 * 1024,
|
||
buffers: Vec::new(),
|
||
}
|
||
}
|
||
}
|
||
|
||
pub(crate) struct InstanceBuffer {
|
||
metal_buffer: metal::Buffer,
|
||
size: usize,
|
||
}
|
||
|
||
impl InstanceBufferPool {
|
||
pub(crate) fn reset(&mut self, buffer_size: usize) {
|
||
self.buffer_size = buffer_size;
|
||
self.buffers.clear();
|
||
}
|
||
|
||
pub(crate) fn acquire(&mut self, device: &metal::Device) -> InstanceBuffer {
|
||
let buffer = self.buffers.pop().unwrap_or_else(|| {
|
||
device.new_buffer(
|
||
self.buffer_size as u64,
|
||
MTLResourceOptions::StorageModeManaged,
|
||
)
|
||
});
|
||
InstanceBuffer {
|
||
metal_buffer: buffer,
|
||
size: self.buffer_size,
|
||
}
|
||
}
|
||
|
||
pub(crate) fn release(&mut self, buffer: InstanceBuffer) {
|
||
if buffer.size == self.buffer_size {
|
||
self.buffers.push(buffer.metal_buffer)
|
||
}
|
||
}
|
||
}
|
||
|
||
pub(crate) struct MetalRenderer {
|
||
device: metal::Device,
|
||
layer: metal::MetalLayer,
|
||
presents_with_transaction: bool,
|
||
command_queue: CommandQueue,
|
||
path_pipeline_state: metal::RenderPipelineState,
|
||
shadows_pipeline_state: metal::RenderPipelineState,
|
||
quads_pipeline_state: metal::RenderPipelineState,
|
||
underlines_pipeline_state: metal::RenderPipelineState,
|
||
monochrome_sprites_pipeline_state: metal::RenderPipelineState,
|
||
polychrome_sprites_pipeline_state: metal::RenderPipelineState,
|
||
surfaces_pipeline_state: metal::RenderPipelineState,
|
||
unit_vertices: metal::Buffer,
|
||
#[allow(clippy::arc_with_non_send_sync)]
|
||
instance_buffer_pool: Arc<Mutex<InstanceBufferPool>>,
|
||
sprite_atlas: Arc<MetalAtlas>,
|
||
core_video_texture_cache: core_video::metal_texture_cache::CVMetalTextureCache,
|
||
sample_count: u64,
|
||
msaa_texture: Option<metal::Texture>,
|
||
}
|
||
|
||
impl MetalRenderer {
|
||
pub fn new(instance_buffer_pool: Arc<Mutex<InstanceBufferPool>>) -> Self {
|
||
// Prefer low‐power integrated GPUs on Intel Mac. On Apple
|
||
// Silicon, there is only ever one GPU, so this is equivalent to
|
||
// `metal::Device::system_default()`.
|
||
let mut devices = metal::Device::all();
|
||
devices.sort_by_key(|device| (device.is_removable(), device.is_low_power()));
|
||
let Some(device) = devices.pop() else {
|
||
log::error!("unable to access a compatible graphics device");
|
||
std::process::exit(1);
|
||
};
|
||
|
||
let layer = metal::MetalLayer::new();
|
||
layer.set_device(&device);
|
||
layer.set_pixel_format(MTLPixelFormat::BGRA8Unorm);
|
||
layer.set_opaque(false);
|
||
layer.set_maximum_drawable_count(3);
|
||
unsafe {
|
||
let _: () = msg_send![&*layer, setAllowsNextDrawableTimeout: NO];
|
||
let _: () = msg_send![&*layer, setNeedsDisplayOnBoundsChange: YES];
|
||
let _: () = msg_send![
|
||
&*layer,
|
||
setAutoresizingMask: AutoresizingMask::WIDTH_SIZABLE
|
||
| AutoresizingMask::HEIGHT_SIZABLE
|
||
];
|
||
}
|
||
#[cfg(feature = "runtime_shaders")]
|
||
let library = device
|
||
.new_library_with_source(&SHADERS_SOURCE_FILE, &metal::CompileOptions::new())
|
||
.expect("error building metal library");
|
||
#[cfg(not(feature = "runtime_shaders"))]
|
||
let library = device
|
||
.new_library_with_data(SHADERS_METALLIB)
|
||
.expect("error building metal library");
|
||
|
||
fn to_float2_bits(point: PointF) -> u64 {
|
||
let mut output = point.y.to_bits() as u64;
|
||
output <<= 32;
|
||
output |= point.x.to_bits() as u64;
|
||
output
|
||
}
|
||
|
||
let unit_vertices = [
|
||
to_float2_bits(point(0., 0.)),
|
||
to_float2_bits(point(1., 0.)),
|
||
to_float2_bits(point(0., 1.)),
|
||
to_float2_bits(point(0., 1.)),
|
||
to_float2_bits(point(1., 0.)),
|
||
to_float2_bits(point(1., 1.)),
|
||
];
|
||
let unit_vertices = device.new_buffer_with_data(
|
||
unit_vertices.as_ptr() as *const c_void,
|
||
mem::size_of_val(&unit_vertices) as u64,
|
||
MTLResourceOptions::StorageModeManaged,
|
||
);
|
||
|
||
let sample_count = [4, 2, 1]
|
||
.into_iter()
|
||
.find(|count| device.supports_texture_sample_count(*count))
|
||
.unwrap_or(1);
|
||
|
||
let path_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"paths",
|
||
"path_vertex",
|
||
"path_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let shadows_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"shadows",
|
||
"shadow_vertex",
|
||
"shadow_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let quads_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"quads",
|
||
"quad_vertex",
|
||
"quad_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let underlines_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"underlines",
|
||
"underline_vertex",
|
||
"underline_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let monochrome_sprites_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"monochrome_sprites",
|
||
"monochrome_sprite_vertex",
|
||
"monochrome_sprite_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let polychrome_sprites_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"polychrome_sprites",
|
||
"polychrome_sprite_vertex",
|
||
"polychrome_sprite_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
let surfaces_pipeline_state = build_pipeline_state(
|
||
&device,
|
||
&library,
|
||
"surfaces",
|
||
"surface_vertex",
|
||
"surface_fragment",
|
||
MTLPixelFormat::BGRA8Unorm,
|
||
sample_count,
|
||
);
|
||
|
||
let command_queue = device.new_command_queue();
|
||
let sprite_atlas = Arc::new(MetalAtlas::new(device.clone()));
|
||
let core_video_texture_cache =
|
||
CVMetalTextureCache::new(None, device.clone(), None).unwrap();
|
||
let msaa_texture = create_msaa_texture(&device, &layer, sample_count);
|
||
|
||
Self {
|
||
device,
|
||
layer,
|
||
presents_with_transaction: false,
|
||
command_queue,
|
||
path_pipeline_state,
|
||
shadows_pipeline_state,
|
||
quads_pipeline_state,
|
||
underlines_pipeline_state,
|
||
monochrome_sprites_pipeline_state,
|
||
polychrome_sprites_pipeline_state,
|
||
surfaces_pipeline_state,
|
||
unit_vertices,
|
||
instance_buffer_pool,
|
||
sprite_atlas,
|
||
core_video_texture_cache,
|
||
sample_count,
|
||
msaa_texture,
|
||
}
|
||
}
|
||
|
||
pub fn layer(&self) -> &metal::MetalLayerRef {
|
||
&self.layer
|
||
}
|
||
|
||
pub fn layer_ptr(&self) -> *mut CAMetalLayer {
|
||
self.layer.as_ptr()
|
||
}
|
||
|
||
pub fn sprite_atlas(&self) -> &Arc<MetalAtlas> {
|
||
&self.sprite_atlas
|
||
}
|
||
|
||
pub fn set_presents_with_transaction(&mut self, presents_with_transaction: bool) {
|
||
self.presents_with_transaction = presents_with_transaction;
|
||
self.layer
|
||
.set_presents_with_transaction(presents_with_transaction);
|
||
}
|
||
|
||
pub fn update_drawable_size(&mut self, size: Size<DevicePixels>) {
|
||
let size = NSSize {
|
||
width: size.width.0 as f64,
|
||
height: size.height.0 as f64,
|
||
};
|
||
unsafe {
|
||
let _: () = msg_send![
|
||
self.layer(),
|
||
setDrawableSize: size
|
||
];
|
||
}
|
||
|
||
self.msaa_texture = create_msaa_texture(&self.device, &self.layer, self.sample_count);
|
||
}
|
||
|
||
pub fn update_transparency(&self, _transparent: bool) {
|
||
// todo(mac)?
|
||
}
|
||
|
||
pub fn destroy(&self) {
|
||
// nothing to do
|
||
}
|
||
|
||
pub fn draw(&mut self, scene: &Scene) {
|
||
let layer = self.layer.clone();
|
||
let viewport_size = layer.drawable_size();
|
||
let viewport_size: Size<DevicePixels> = size(
|
||
(viewport_size.width.ceil() as i32).into(),
|
||
(viewport_size.height.ceil() as i32).into(),
|
||
);
|
||
let drawable = if let Some(drawable) = layer.next_drawable() {
|
||
drawable
|
||
} else {
|
||
log::error!(
|
||
"failed to retrieve next drawable, drawable size: {:?}",
|
||
viewport_size
|
||
);
|
||
return;
|
||
};
|
||
|
||
loop {
|
||
let mut instance_buffer = self.instance_buffer_pool.lock().acquire(&self.device);
|
||
|
||
let command_buffer =
|
||
self.draw_primitives(scene, &mut instance_buffer, drawable, viewport_size);
|
||
|
||
match command_buffer {
|
||
Ok(command_buffer) => {
|
||
let instance_buffer_pool = self.instance_buffer_pool.clone();
|
||
let instance_buffer = Cell::new(Some(instance_buffer));
|
||
let block = ConcreteBlock::new(move |_| {
|
||
if let Some(instance_buffer) = instance_buffer.take() {
|
||
instance_buffer_pool.lock().release(instance_buffer);
|
||
}
|
||
});
|
||
let block = block.copy();
|
||
command_buffer.add_completed_handler(&block);
|
||
|
||
if self.presents_with_transaction {
|
||
command_buffer.commit();
|
||
command_buffer.wait_until_scheduled();
|
||
drawable.present();
|
||
} else {
|
||
command_buffer.present_drawable(drawable);
|
||
command_buffer.commit();
|
||
}
|
||
return;
|
||
}
|
||
Err(err) => {
|
||
log::error!(
|
||
"failed to render: {}. retrying with larger instance buffer size",
|
||
err
|
||
);
|
||
let mut instance_buffer_pool = self.instance_buffer_pool.lock();
|
||
let buffer_size = instance_buffer_pool.buffer_size;
|
||
if buffer_size >= 256 * 1024 * 1024 {
|
||
log::error!("instance buffer size grew too large: {}", buffer_size);
|
||
break;
|
||
}
|
||
instance_buffer_pool.reset(buffer_size * 2);
|
||
log::info!(
|
||
"increased instance buffer size to {}",
|
||
instance_buffer_pool.buffer_size
|
||
);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
fn draw_primitives(
|
||
&mut self,
|
||
scene: &Scene,
|
||
instance_buffer: &mut InstanceBuffer,
|
||
drawable: &metal::MetalDrawableRef,
|
||
viewport_size: Size<DevicePixels>,
|
||
) -> Result<metal::CommandBuffer> {
|
||
let command_queue = self.command_queue.clone();
|
||
let command_buffer = command_queue.new_command_buffer();
|
||
let mut instance_offset = 0;
|
||
let render_pass_descriptor = metal::RenderPassDescriptor::new();
|
||
let color_attachment = render_pass_descriptor
|
||
.color_attachments()
|
||
.object_at(0)
|
||
.unwrap();
|
||
|
||
if let Some(msaa_texture_ref) = self.msaa_texture.as_deref() {
|
||
color_attachment.set_texture(Some(msaa_texture_ref));
|
||
color_attachment.set_load_action(metal::MTLLoadAction::Clear);
|
||
color_attachment.set_store_action(metal::MTLStoreAction::MultisampleResolve);
|
||
color_attachment.set_resolve_texture(Some(drawable.texture()));
|
||
} else {
|
||
color_attachment.set_load_action(metal::MTLLoadAction::Clear);
|
||
color_attachment.set_texture(Some(drawable.texture()));
|
||
color_attachment.set_store_action(metal::MTLStoreAction::Store);
|
||
}
|
||
|
||
let alpha = if self.layer.is_opaque() { 1. } else { 0. };
|
||
color_attachment.set_clear_color(metal::MTLClearColor::new(0., 0., 0., alpha));
|
||
let command_encoder = command_buffer.new_render_command_encoder(render_pass_descriptor);
|
||
|
||
command_encoder.set_viewport(metal::MTLViewport {
|
||
originX: 0.0,
|
||
originY: 0.0,
|
||
width: i32::from(viewport_size.width) as f64,
|
||
height: i32::from(viewport_size.height) as f64,
|
||
znear: 0.0,
|
||
zfar: 1.0,
|
||
});
|
||
|
||
for batch in scene.batches() {
|
||
let ok = match batch {
|
||
PrimitiveBatch::Shadows(shadows) => self.draw_shadows(
|
||
shadows,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::Quads(quads) => self.draw_quads(
|
||
quads,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::Paths(paths) => self.draw_paths(
|
||
paths,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::Underlines(underlines) => self.draw_underlines(
|
||
underlines,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::MonochromeSprites {
|
||
texture_id,
|
||
sprites,
|
||
} => self.draw_monochrome_sprites(
|
||
texture_id,
|
||
sprites,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::PolychromeSprites {
|
||
texture_id,
|
||
sprites,
|
||
} => self.draw_polychrome_sprites(
|
||
texture_id,
|
||
sprites,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
PrimitiveBatch::Surfaces(surfaces) => self.draw_surfaces(
|
||
surfaces,
|
||
instance_buffer,
|
||
&mut instance_offset,
|
||
viewport_size,
|
||
command_encoder,
|
||
),
|
||
};
|
||
|
||
if !ok {
|
||
command_encoder.end_encoding();
|
||
anyhow::bail!(
|
||
"scene too large: {} paths, {} shadows, {} quads, {} underlines, {} mono, {} poly, {} surfaces",
|
||
scene.paths.len(),
|
||
scene.shadows.len(),
|
||
scene.quads.len(),
|
||
scene.underlines.len(),
|
||
scene.monochrome_sprites.len(),
|
||
scene.polychrome_sprites.len(),
|
||
scene.surfaces.len(),
|
||
);
|
||
}
|
||
}
|
||
|
||
command_encoder.end_encoding();
|
||
|
||
instance_buffer.metal_buffer.did_modify_range(NSRange {
|
||
location: 0,
|
||
length: instance_offset as NSUInteger,
|
||
});
|
||
Ok(command_buffer.to_owned())
|
||
}
|
||
|
||
fn draw_shadows(
|
||
&self,
|
||
shadows: &[Shadow],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if shadows.is_empty() {
|
||
return true;
|
||
}
|
||
align_offset(instance_offset);
|
||
|
||
command_encoder.set_render_pipeline_state(&self.shadows_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
ShadowInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_buffer(
|
||
ShadowInputIndex::Shadows as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_fragment_buffer(
|
||
ShadowInputIndex::Shadows as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
|
||
command_encoder.set_vertex_bytes(
|
||
ShadowInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
|
||
let shadow_bytes_len = mem::size_of_val(shadows);
|
||
let buffer_contents =
|
||
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
|
||
|
||
let next_offset = *instance_offset + shadow_bytes_len;
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
unsafe {
|
||
ptr::copy_nonoverlapping(
|
||
shadows.as_ptr() as *const u8,
|
||
buffer_contents,
|
||
shadow_bytes_len,
|
||
);
|
||
}
|
||
|
||
command_encoder.draw_primitives_instanced(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
0,
|
||
6,
|
||
shadows.len() as u64,
|
||
);
|
||
*instance_offset = next_offset;
|
||
true
|
||
}
|
||
|
||
fn draw_quads(
|
||
&self,
|
||
quads: &[Quad],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if quads.is_empty() {
|
||
return true;
|
||
}
|
||
align_offset(instance_offset);
|
||
|
||
command_encoder.set_render_pipeline_state(&self.quads_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
QuadInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_buffer(
|
||
QuadInputIndex::Quads as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_fragment_buffer(
|
||
QuadInputIndex::Quads as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
|
||
command_encoder.set_vertex_bytes(
|
||
QuadInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
|
||
let quad_bytes_len = mem::size_of_val(quads);
|
||
let buffer_contents =
|
||
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
|
||
|
||
let next_offset = *instance_offset + quad_bytes_len;
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
unsafe {
|
||
ptr::copy_nonoverlapping(quads.as_ptr() as *const u8, buffer_contents, quad_bytes_len);
|
||
}
|
||
|
||
command_encoder.draw_primitives_instanced(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
0,
|
||
6,
|
||
quads.len() as u64,
|
||
);
|
||
*instance_offset = next_offset;
|
||
true
|
||
}
|
||
|
||
fn draw_paths(
|
||
&self,
|
||
paths: &[Path<ScaledPixels>],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if paths.is_empty() {
|
||
return true;
|
||
}
|
||
|
||
command_encoder.set_render_pipeline_state(&self.path_pipeline_state);
|
||
|
||
unsafe {
|
||
let base_addr = instance_buffer.metal_buffer.contents();
|
||
let mut p = (base_addr as *mut u8).add(*instance_offset);
|
||
let mut draw_indirect_commands = Vec::with_capacity(paths.len());
|
||
|
||
// copy vertices
|
||
let vertices_offset = (p as usize) - (base_addr as usize);
|
||
let mut first_vertex = 0;
|
||
for (i, path) in paths.iter().enumerate() {
|
||
if (p as usize) - (base_addr as usize)
|
||
+ (mem::size_of::<PathVertex<ScaledPixels>>() * path.vertices.len())
|
||
> instance_buffer.size
|
||
{
|
||
return false;
|
||
}
|
||
|
||
for v in &path.vertices {
|
||
*(p as *mut PathVertex<ScaledPixels>) = PathVertex {
|
||
xy_position: v.xy_position,
|
||
content_mask: ContentMask {
|
||
bounds: path.content_mask.bounds,
|
||
},
|
||
};
|
||
p = p.add(mem::size_of::<PathVertex<ScaledPixels>>());
|
||
}
|
||
|
||
draw_indirect_commands.push(MTLDrawPrimitivesIndirectArguments {
|
||
vertexCount: path.vertices.len() as u32,
|
||
instanceCount: 1,
|
||
vertexStart: first_vertex,
|
||
baseInstance: i as u32,
|
||
});
|
||
first_vertex += path.vertices.len() as u32;
|
||
}
|
||
|
||
// copy sprites
|
||
let sprites_offset = (p as u64) - (base_addr as u64);
|
||
if (p as usize) - (base_addr as usize) + (mem::size_of::<PathSprite>() * paths.len())
|
||
> instance_buffer.size
|
||
{
|
||
return false;
|
||
}
|
||
for path in paths {
|
||
*(p as *mut PathSprite) = PathSprite {
|
||
bounds: path.bounds,
|
||
color: path.color,
|
||
};
|
||
p = p.add(mem::size_of::<PathSprite>());
|
||
}
|
||
|
||
// copy indirect commands
|
||
let icb_bytes_len = mem::size_of_val(draw_indirect_commands.as_slice());
|
||
let icb_offset = (p as u64) - (base_addr as u64);
|
||
if (p as usize) - (base_addr as usize) + icb_bytes_len > instance_buffer.size {
|
||
return false;
|
||
}
|
||
ptr::copy_nonoverlapping(
|
||
draw_indirect_commands.as_ptr() as *const u8,
|
||
p,
|
||
icb_bytes_len,
|
||
);
|
||
p = p.add(icb_bytes_len);
|
||
|
||
// draw path
|
||
command_encoder.set_vertex_buffer(
|
||
PathInputIndex::Vertices as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
vertices_offset as u64,
|
||
);
|
||
|
||
command_encoder.set_vertex_bytes(
|
||
PathInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
|
||
command_encoder.set_vertex_buffer(
|
||
PathInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
sprites_offset,
|
||
);
|
||
|
||
command_encoder.set_fragment_buffer(
|
||
PathInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
sprites_offset,
|
||
);
|
||
|
||
for i in 0..paths.len() {
|
||
command_encoder.draw_primitives_indirect(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
&instance_buffer.metal_buffer,
|
||
icb_offset
|
||
+ (i * std::mem::size_of::<MTLDrawPrimitivesIndirectArguments>()) as u64,
|
||
);
|
||
}
|
||
|
||
*instance_offset = (p as usize) - (base_addr as usize);
|
||
}
|
||
|
||
true
|
||
}
|
||
|
||
fn draw_underlines(
|
||
&self,
|
||
underlines: &[Underline],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if underlines.is_empty() {
|
||
return true;
|
||
}
|
||
align_offset(instance_offset);
|
||
|
||
command_encoder.set_render_pipeline_state(&self.underlines_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
UnderlineInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_buffer(
|
||
UnderlineInputIndex::Underlines as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_fragment_buffer(
|
||
UnderlineInputIndex::Underlines as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
|
||
command_encoder.set_vertex_bytes(
|
||
UnderlineInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
|
||
let underline_bytes_len = mem::size_of_val(underlines);
|
||
let buffer_contents =
|
||
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
|
||
|
||
let next_offset = *instance_offset + underline_bytes_len;
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
unsafe {
|
||
ptr::copy_nonoverlapping(
|
||
underlines.as_ptr() as *const u8,
|
||
buffer_contents,
|
||
underline_bytes_len,
|
||
);
|
||
}
|
||
|
||
command_encoder.draw_primitives_instanced(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
0,
|
||
6,
|
||
underlines.len() as u64,
|
||
);
|
||
*instance_offset = next_offset;
|
||
true
|
||
}
|
||
|
||
fn draw_monochrome_sprites(
|
||
&self,
|
||
texture_id: AtlasTextureId,
|
||
sprites: &[MonochromeSprite],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if sprites.is_empty() {
|
||
return true;
|
||
}
|
||
align_offset(instance_offset);
|
||
|
||
let sprite_bytes_len = mem::size_of_val(sprites);
|
||
let buffer_contents =
|
||
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
|
||
|
||
let next_offset = *instance_offset + sprite_bytes_len;
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
let texture = self.sprite_atlas.metal_texture(texture_id);
|
||
let texture_size = size(
|
||
DevicePixels(texture.width() as i32),
|
||
DevicePixels(texture.height() as i32),
|
||
);
|
||
command_encoder.set_render_pipeline_state(&self.monochrome_sprites_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
SpriteInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_buffer(
|
||
SpriteInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SpriteInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SpriteInputIndex::AtlasTextureSize as u64,
|
||
mem::size_of_val(&texture_size) as u64,
|
||
&texture_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
command_encoder.set_fragment_buffer(
|
||
SpriteInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_fragment_texture(SpriteInputIndex::AtlasTexture as u64, Some(&texture));
|
||
|
||
unsafe {
|
||
ptr::copy_nonoverlapping(
|
||
sprites.as_ptr() as *const u8,
|
||
buffer_contents,
|
||
sprite_bytes_len,
|
||
);
|
||
}
|
||
|
||
command_encoder.draw_primitives_instanced(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
0,
|
||
6,
|
||
sprites.len() as u64,
|
||
);
|
||
*instance_offset = next_offset;
|
||
true
|
||
}
|
||
|
||
fn draw_polychrome_sprites(
|
||
&self,
|
||
texture_id: AtlasTextureId,
|
||
sprites: &[PolychromeSprite],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
if sprites.is_empty() {
|
||
return true;
|
||
}
|
||
align_offset(instance_offset);
|
||
|
||
let texture = self.sprite_atlas.metal_texture(texture_id);
|
||
let texture_size = size(
|
||
DevicePixels(texture.width() as i32),
|
||
DevicePixels(texture.height() as i32),
|
||
);
|
||
command_encoder.set_render_pipeline_state(&self.polychrome_sprites_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
SpriteInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_buffer(
|
||
SpriteInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SpriteInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SpriteInputIndex::AtlasTextureSize as u64,
|
||
mem::size_of_val(&texture_size) as u64,
|
||
&texture_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
command_encoder.set_fragment_buffer(
|
||
SpriteInputIndex::Sprites as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_fragment_texture(SpriteInputIndex::AtlasTexture as u64, Some(&texture));
|
||
|
||
let sprite_bytes_len = mem::size_of_val(sprites);
|
||
let buffer_contents =
|
||
unsafe { (instance_buffer.metal_buffer.contents() as *mut u8).add(*instance_offset) };
|
||
|
||
let next_offset = *instance_offset + sprite_bytes_len;
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
unsafe {
|
||
ptr::copy_nonoverlapping(
|
||
sprites.as_ptr() as *const u8,
|
||
buffer_contents,
|
||
sprite_bytes_len,
|
||
);
|
||
}
|
||
|
||
command_encoder.draw_primitives_instanced(
|
||
metal::MTLPrimitiveType::Triangle,
|
||
0,
|
||
6,
|
||
sprites.len() as u64,
|
||
);
|
||
*instance_offset = next_offset;
|
||
true
|
||
}
|
||
|
||
fn draw_surfaces(
|
||
&mut self,
|
||
surfaces: &[PaintSurface],
|
||
instance_buffer: &mut InstanceBuffer,
|
||
instance_offset: &mut usize,
|
||
viewport_size: Size<DevicePixels>,
|
||
command_encoder: &metal::RenderCommandEncoderRef,
|
||
) -> bool {
|
||
command_encoder.set_render_pipeline_state(&self.surfaces_pipeline_state);
|
||
command_encoder.set_vertex_buffer(
|
||
SurfaceInputIndex::Vertices as u64,
|
||
Some(&self.unit_vertices),
|
||
0,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SurfaceInputIndex::ViewportSize as u64,
|
||
mem::size_of_val(&viewport_size) as u64,
|
||
&viewport_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
|
||
for surface in surfaces {
|
||
let texture_size = size(
|
||
DevicePixels::from(surface.image_buffer.get_width() as i32),
|
||
DevicePixels::from(surface.image_buffer.get_height() as i32),
|
||
);
|
||
|
||
assert_eq!(
|
||
surface.image_buffer.get_pixel_format(),
|
||
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
|
||
);
|
||
|
||
let y_texture = self
|
||
.core_video_texture_cache
|
||
.create_texture_from_image(
|
||
surface.image_buffer.as_concrete_TypeRef(),
|
||
None,
|
||
MTLPixelFormat::R8Unorm,
|
||
surface.image_buffer.get_width_of_plane(0),
|
||
surface.image_buffer.get_height_of_plane(0),
|
||
0,
|
||
)
|
||
.unwrap();
|
||
let cb_cr_texture = self
|
||
.core_video_texture_cache
|
||
.create_texture_from_image(
|
||
surface.image_buffer.as_concrete_TypeRef(),
|
||
None,
|
||
MTLPixelFormat::RG8Unorm,
|
||
surface.image_buffer.get_width_of_plane(1),
|
||
surface.image_buffer.get_height_of_plane(1),
|
||
1,
|
||
)
|
||
.unwrap();
|
||
|
||
align_offset(instance_offset);
|
||
let next_offset = *instance_offset + mem::size_of::<Surface>();
|
||
if next_offset > instance_buffer.size {
|
||
return false;
|
||
}
|
||
|
||
command_encoder.set_vertex_buffer(
|
||
SurfaceInputIndex::Surfaces as u64,
|
||
Some(&instance_buffer.metal_buffer),
|
||
*instance_offset as u64,
|
||
);
|
||
command_encoder.set_vertex_bytes(
|
||
SurfaceInputIndex::TextureSize as u64,
|
||
mem::size_of_val(&texture_size) as u64,
|
||
&texture_size as *const Size<DevicePixels> as *const _,
|
||
);
|
||
// let y_texture = y_texture.get_texture().unwrap().
|
||
command_encoder.set_fragment_texture(SurfaceInputIndex::YTexture as u64, unsafe {
|
||
let texture = CVMetalTextureGetTexture(y_texture.as_concrete_TypeRef());
|
||
Some(metal::TextureRef::from_ptr(texture as *mut _))
|
||
});
|
||
command_encoder.set_fragment_texture(SurfaceInputIndex::CbCrTexture as u64, unsafe {
|
||
let texture = CVMetalTextureGetTexture(cb_cr_texture.as_concrete_TypeRef());
|
||
Some(metal::TextureRef::from_ptr(texture as *mut _))
|
||
});
|
||
|
||
unsafe {
|
||
let buffer_contents = (instance_buffer.metal_buffer.contents() as *mut u8)
|
||
.add(*instance_offset)
|
||
as *mut SurfaceBounds;
|
||
ptr::write(
|
||
buffer_contents,
|
||
SurfaceBounds {
|
||
bounds: surface.bounds,
|
||
content_mask: surface.content_mask.clone(),
|
||
},
|
||
);
|
||
}
|
||
|
||
command_encoder.draw_primitives(metal::MTLPrimitiveType::Triangle, 0, 6);
|
||
*instance_offset = next_offset;
|
||
}
|
||
true
|
||
}
|
||
}
|
||
|
||
fn build_pipeline_state(
|
||
device: &metal::DeviceRef,
|
||
library: &metal::LibraryRef,
|
||
label: &str,
|
||
vertex_fn_name: &str,
|
||
fragment_fn_name: &str,
|
||
pixel_format: metal::MTLPixelFormat,
|
||
sample_count: u64,
|
||
) -> metal::RenderPipelineState {
|
||
let vertex_fn = library
|
||
.get_function(vertex_fn_name, None)
|
||
.expect("error locating vertex function");
|
||
let fragment_fn = library
|
||
.get_function(fragment_fn_name, None)
|
||
.expect("error locating fragment function");
|
||
|
||
let descriptor = metal::RenderPipelineDescriptor::new();
|
||
descriptor.set_label(label);
|
||
descriptor.set_vertex_function(Some(vertex_fn.as_ref()));
|
||
descriptor.set_fragment_function(Some(fragment_fn.as_ref()));
|
||
descriptor.set_sample_count(sample_count);
|
||
let color_attachment = descriptor.color_attachments().object_at(0).unwrap();
|
||
color_attachment.set_pixel_format(pixel_format);
|
||
color_attachment.set_blending_enabled(true);
|
||
color_attachment.set_rgb_blend_operation(metal::MTLBlendOperation::Add);
|
||
color_attachment.set_alpha_blend_operation(metal::MTLBlendOperation::Add);
|
||
color_attachment.set_source_rgb_blend_factor(metal::MTLBlendFactor::SourceAlpha);
|
||
color_attachment.set_source_alpha_blend_factor(metal::MTLBlendFactor::One);
|
||
color_attachment.set_destination_rgb_blend_factor(metal::MTLBlendFactor::OneMinusSourceAlpha);
|
||
color_attachment.set_destination_alpha_blend_factor(metal::MTLBlendFactor::One);
|
||
|
||
device
|
||
.new_render_pipeline_state(&descriptor)
|
||
.expect("could not create render pipeline state")
|
||
}
|
||
|
||
// Align to multiples of 256 make Metal happy.
|
||
fn align_offset(offset: &mut usize) {
|
||
*offset = (*offset).div_ceil(256) * 256;
|
||
}
|
||
|
||
fn create_msaa_texture(
|
||
device: &metal::Device,
|
||
layer: &metal::MetalLayer,
|
||
sample_count: u64,
|
||
) -> Option<metal::Texture> {
|
||
let viewport_size = layer.drawable_size();
|
||
let width = viewport_size.width.ceil() as u64;
|
||
let height = viewport_size.height.ceil() as u64;
|
||
|
||
if width == 0 || height == 0 {
|
||
return None;
|
||
}
|
||
|
||
if sample_count <= 1 {
|
||
return None;
|
||
}
|
||
|
||
let texture_descriptor = metal::TextureDescriptor::new();
|
||
texture_descriptor.set_texture_type(metal::MTLTextureType::D2Multisample);
|
||
|
||
// MTLStorageMode default is `shared` only for Apple silicon GPUs. Use `private` for Apple and Intel GPUs both.
|
||
// Reference: https://developer.apple.com/documentation/metal/choosing-a-resource-storage-mode-for-apple-gpus
|
||
texture_descriptor.set_storage_mode(metal::MTLStorageMode::Private);
|
||
|
||
texture_descriptor.set_width(width);
|
||
texture_descriptor.set_height(height);
|
||
texture_descriptor.set_pixel_format(layer.pixel_format());
|
||
texture_descriptor.set_usage(metal::MTLTextureUsage::RenderTarget);
|
||
texture_descriptor.set_sample_count(sample_count);
|
||
|
||
let metal_texture = device.new_texture(&texture_descriptor);
|
||
Some(metal_texture)
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum ShadowInputIndex {
|
||
Vertices = 0,
|
||
Shadows = 1,
|
||
ViewportSize = 2,
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum QuadInputIndex {
|
||
Vertices = 0,
|
||
Quads = 1,
|
||
ViewportSize = 2,
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum UnderlineInputIndex {
|
||
Vertices = 0,
|
||
Underlines = 1,
|
||
ViewportSize = 2,
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum SpriteInputIndex {
|
||
Vertices = 0,
|
||
Sprites = 1,
|
||
ViewportSize = 2,
|
||
AtlasTextureSize = 3,
|
||
AtlasTexture = 4,
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum SurfaceInputIndex {
|
||
Vertices = 0,
|
||
Surfaces = 1,
|
||
ViewportSize = 2,
|
||
TextureSize = 3,
|
||
YTexture = 4,
|
||
CbCrTexture = 5,
|
||
}
|
||
|
||
#[repr(C)]
|
||
enum PathInputIndex {
|
||
Vertices = 0,
|
||
ViewportSize = 1,
|
||
Sprites = 2,
|
||
}
|
||
|
||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
#[repr(C)]
|
||
pub struct PathSprite {
|
||
pub bounds: Bounds<ScaledPixels>,
|
||
pub color: Background,
|
||
}
|
||
|
||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
#[repr(C)]
|
||
pub struct SurfaceBounds {
|
||
pub bounds: Bounds<ScaledPixels>,
|
||
pub content_mask: ContentMask<ScaledPixels>,
|
||
}
|