zng_view_api/audio.rs
1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11use crate::api_extension::{ApiExtensionId, ApiExtensionPayload};
12
13crate::declare_id! {
14 /// Audio device ID.
15 ///
16 /// In the View Process this is mapped to a system id.
17 ///
18 /// In the App Process this is mapped to an unique id, but does not survived View crashes.
19 ///
20 /// The View Process defines the ID.
21 pub struct AudioDeviceId(_);
22
23 /// Id of a decoded or on demand decoding audio track in the cache.
24 ///
25 /// The View Process defines the ID.
26 pub struct AudioId(_);
27
28 /// Audio playback stream ID.
29 ///
30 /// In the View Process this is mapped to a system id.
31 ///
32 /// In the App Process this is an unique id that survives View crashes.
33 ///
34 /// The App Process defines the ID.
35 pub struct AudioOutputId(_);
36
37 /// Audio playback request ID.
38 ///
39 /// The View Process defines the ID.
40 pub struct AudioPlayId(_);
41
42 /// Id of an audio encode task.
43 ///
44 /// The View Process defines the ID.
45 pub struct AudioEncodeId(_);
46}
47
48/// Info about an input or output device.
49#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[non_exhaustive]
51pub struct AudioDeviceInfo {
52 /// Device display name.
53 pub name: Txt,
54 /// Device input/output capabilities.
55 pub capabilities: AudioDeviceCapability,
56 /// Input stream modes this device can produce.
57 pub input_modes: Vec<AudioStreamMode>,
58 /// Output stream modes this device can consume.
59 pub output_modes: Vec<AudioStreamMode>,
60}
61
62bitflags! {
63 /// Represents audio device input/output capabilities.
64 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65 pub struct AudioDeviceCapability: u8 {
66 /// Device can generate audio streams.
67 const INPUT = 0b01;
68 /// Device can consume audio streams.
69 const OUTPUT = 0b11;
70 }
71}
72
73/// Represents steam capability of an audio device.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
75#[non_exhaustive]
76pub struct AudioStreamMode {
77 /// Number of audio channels.
78 pub channels: NonZeroU16,
79 /// Minimum and maximum sample rate.
80 pub sample_rate: SampleRate,
81 /// Minimum and maximum supported buffer size.
82 pub buffer_size: BufferSize,
83}
84
85/// Represents the minimum and maximum sample rate per audio channel.
86///
87/// Values are in samples processed per second.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
89pub struct SampleRate {
90 /// Minimum, inclusive.
91 pub min: u32,
92 /// Maximum, inclusive.
93 pub max: u32,
94}
95
96/// Represents the minimum and maximum supported buffer size for the device.
97#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
98#[non_exhaustive]
99pub enum BufferSize {
100 /// Range in frames per second.
101 Range {
102 /// Minimum, inclusive.
103 min: u32,
104 /// Maximum, inclusive.
105 max: u32,
106 },
107 /// Platform cannot describe buffer size for this device.
108 Unknown,
109}
110
111/// Represent an audio load/decode request.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113#[non_exhaustive]
114pub struct AudioRequest<D> {
115 /// Audio data format.
116 pub format: AudioDataFormat,
117
118 /// Audio data.
119 pub data: D,
120
121 /// Maximum allowed decoded size in bytes.
122 ///
123 /// View-process will avoid decoding and return an error if the track would exceed this limit.
124 pub max_decoded_len: u64,
125
126 /// Defines what tracks are decoded from multi image containers.
127 pub tracks: AudioTracksMode,
128
129 /// Audio is a track (or subtree) of this other audio.
130 ///
131 /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
132 /// an already decoded image is requested after a respawn to maintain the original container structure.
133 pub parent: Option<AudioTrackMetadata>,
134}
135impl<D> AudioRequest<D> {
136 /// New.
137 pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
138 Self {
139 format,
140 data,
141 max_decoded_len,
142 tracks: AudioTracksMode::PRIMARY,
143 parent: None,
144 }
145 }
146}
147
148/// Format of the audio bytes.
149#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
150#[non_exhaustive]
151pub enum AudioDataFormat {
152 /// Data is already decoded stream of interleaved `f32` samples.
153 InterleavedF32 {
154 /// Number of channels interleaved in the track.
155 channel_count: u16,
156 /// Samples per second.
157 ///
158 /// A sample is a single sequence of `channel_count`.
159 sample_rate: u32,
160 /// Total duration of the track, if it is known.
161 total_duration: Option<Duration>,
162 },
163
164 /// The audio is encoded.
165 ///
166 /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
167 /// is unknown or the file header does not match.
168 FileExtension(Txt),
169
170 /// The audio is encoded.
171 ///
172 /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
173 /// is unknown or the file header does not match.
174 MimeType(Txt),
175
176 /// The image is encoded.
177 ///
178 /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
179 Unknown,
180}
181impl From<Txt> for AudioDataFormat {
182 fn from(ext_or_mime: Txt) -> Self {
183 if ext_or_mime.contains('/') {
184 AudioDataFormat::MimeType(ext_or_mime)
185 } else {
186 AudioDataFormat::FileExtension(ext_or_mime)
187 }
188 }
189}
190impl From<&str> for AudioDataFormat {
191 fn from(ext_or_mime: &str) -> Self {
192 Txt::from_str(ext_or_mime).into()
193 }
194}
195
196/// Represents an audio codec capability.
197///
198/// This type will be used in the next breaking release of the view API.
199#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
200#[non_exhaustive]
201pub struct AudioFormat {
202 /// Display name of the format.
203 pub display_name: Txt,
204
205 /// Media types (MIME) associated with the format.
206 ///
207 /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
208 pub media_type_suffixes: Txt,
209
210 /// Common file extensions associated with the format.
211 ///
212 /// Lowercase, without dot, comma separated if there is more than one.
213 pub file_extensions: Txt,
214
215 /// Capabilities of this format.
216 pub capabilities: AudioFormatCapability,
217}
218impl AudioFormat {
219 /// From static str.
220 ///
221 /// # Panics
222 ///
223 /// Panics if `media_type_suffixes` not ASCII.
224 pub const fn from_static(
225 display_name: &'static str,
226 media_type_suffixes: &'static str,
227 file_extensions: &'static str,
228 capabilities: AudioFormatCapability,
229 ) -> Self {
230 assert!(media_type_suffixes.is_ascii());
231 Self {
232 display_name: Txt::from_static(display_name),
233 media_type_suffixes: Txt::from_static(media_type_suffixes),
234 file_extensions: Txt::from_static(file_extensions),
235 capabilities,
236 }
237 }
238
239 /// Iterate over media type suffixes.
240 pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
241 self.media_type_suffixes.split(',').map(|e| e.trim())
242 }
243
244 /// Iterate over full media types, with `"image/"` prefix.
245 pub fn media_types(&self) -> impl Iterator<Item = Txt> {
246 self.media_type_suffixes_iter().map(Txt::from_str)
247 }
248
249 /// Iterate over extensions.
250 pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
251 self.file_extensions.split(',').map(|e| e.trim())
252 }
253
254 /// Checks if `f` matches any of the mime types or any of the file extensions.
255 ///
256 /// File extensions comparison ignores dot and ASCII case.
257 pub fn matches(&self, f: &str) -> bool {
258 let f = f.strip_prefix('.').unwrap_or(f);
259 let f = f.strip_prefix("audio/").unwrap_or(f);
260 self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
261 }
262}
263
264bitflags! {
265 /// Capabilities of an [`AudioFormat`] implementation.
266 ///
267 /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
268 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
269 pub struct AudioFormatCapability: u8 {
270 /// View-process can encode audio in this format.
271 const ENCODE = 0b_0000_0001;
272 }
273}
274
275/// Represent a image encode request.
276#[derive(Debug, Clone, Serialize, Deserialize)]
277#[non_exhaustive]
278pub struct AudioEncodeRequest {
279 /// Audio to encode.
280 pub id: AudioId,
281
282 /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
283 pub format: Txt,
284
285 /// The audio to encode.
286 pub mix: AudioMix,
287}
288impl AudioEncodeRequest {
289 /// New.
290 pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
291 Self { id, format, mix }
292 }
293}
294
295/// Represents decoded header metadata about an audio track.
296#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
297#[non_exhaustive]
298pub struct AudioMetadata {
299 /// Audio ID.
300 pub id: AudioId,
301
302 /// Number of channels interleaved in the track.
303 pub channel_count: u16,
304 /// Samples per second.
305 ///
306 /// A sample is a single sequence of `channel_count`.
307 pub sample_rate: u32,
308 /// Total duration of the track, if it is known.
309 pub total_duration: Option<Duration>,
310
311 /// Track is an entry (or subtree) of this other track.
312 pub parent: Option<AudioTrackMetadata>,
313
314 /// Custom metadata.
315 pub extensions: Vec<(ApiExtensionId, ApiExtensionPayload)>,
316}
317impl AudioMetadata {
318 /// New.
319 pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
320 Self {
321 id,
322 channel_count,
323 sample_rate,
324 total_duration: None,
325 parent: None,
326 extensions: vec![],
327 }
328 }
329}
330/// Invalid initial value.
331impl Default for AudioMetadata {
332 fn default() -> Self {
333 Self {
334 id: AudioId::INVALID,
335 channel_count: Default::default(),
336 sample_rate: Default::default(),
337 total_duration: Default::default(),
338 parent: Default::default(),
339 extensions: vec![],
340 }
341 }
342}
343
344/// Represents decoded header metadata about a track position in the container represented by another audio.
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
346#[non_exhaustive]
347pub struct AudioTrackMetadata {
348 /// Image this one belongs too.
349 ///
350 /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
351 pub parent: AudioId,
352 /// Sort index of the track in the list of tracks.
353 pub index: usize,
354}
355impl AudioTrackMetadata {
356 /// New.
357 pub fn new(parent: AudioId, index: usize) -> Self {
358 Self { parent, index }
359 }
360}
361
362/// Represents a partial or fully decoded audio.
363///
364/// See [`Event::AudioDecoded`] for more details.
365///
366/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
367#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
368#[non_exhaustive]
369pub struct AudioDecoded {
370 /// The audio track ID.
371 ///
372 /// An [`AudioMetadata`] for this ID was already notified before this event.
373 pub id: AudioId,
374
375 /// Offset of the `chunk` on the track.
376 ///
377 /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
378 ///
379 /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
380 ///
381 /// [`channel_count`]: AudioMetadata::channel_count
382 pub offset: usize,
383
384 /// Interleaved `f32` samples.
385 pub chunk: IpcBytesCast<f32>,
386
387 /// If the `chunk` is actually the full decoded audio.
388 ///
389 /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
390 /// will read directly from the cache.
391 ///
392 /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
393 /// Depending on the request the audio may never be fully cached, always decoding again on replay.
394 pub is_full: bool,
395}
396impl AudioDecoded {
397 /// New.
398 pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
399 Self {
400 id,
401 offset: 0,
402 chunk,
403 is_full: false,
404 }
405 }
406}
407/// Invalid initial value.
408impl Default for AudioDecoded {
409 fn default() -> Self {
410 Self {
411 id: AudioId::INVALID,
412 offset: Default::default(),
413 chunk: Default::default(),
414 is_full: Default::default(),
415 }
416 }
417}
418
419/// Represents a connection request to an audio output device.
420#[derive(Debug, Clone, Serialize, Deserialize)]
421#[non_exhaustive]
422pub struct AudioOutputRequest {
423 /// ID that will identify the new output.
424 pub id: AudioOutputId,
425
426 /// Initial config.
427 pub config: AudioOutputConfig,
428}
429impl AudioOutputRequest {
430 /// New.
431 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
432 Self { id, config }
433 }
434}
435
436/// Represents an audio playback update request.
437#[derive(Debug, Clone, Serialize, Deserialize)]
438#[non_exhaustive]
439pub struct AudioOutputUpdateRequest {
440 /// The output stream.
441 pub id: AudioOutputId,
442 /// New config.
443 pub config: AudioOutputConfig,
444}
445impl AudioOutputUpdateRequest {
446 /// New.
447 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
448 Self { id, config }
449 }
450}
451
452/// Represents an audio output stream capabilities.
453///
454/// Any audio played on this output is automatically converted to the channel count and sample rate.
455#[derive(Debug, Clone, Serialize, Deserialize)]
456#[non_exhaustive]
457pub struct AudioOutputOpenData {
458 /// Number of channels interleaved supported by this output.
459 pub channel_count: u16,
460 /// Samples per second.
461 ///
462 /// A sample is a single sequence of `channel_count`.
463 pub sample_rate: u32,
464}
465impl AudioOutputOpenData {
466 /// New.
467 pub fn new(channel_count: u16, sample_rate: u32) -> Self {
468 Self {
469 channel_count,
470 sample_rate,
471 }
472 }
473}
474
475/// Audio playback config.
476#[derive(Debug, Clone, Serialize, Deserialize)]
477#[non_exhaustive]
478pub struct AudioOutputConfig {
479 /// Playback state.
480 pub state: AudioOutputState,
481
482 /// Volume of the sound.
483 ///
484 /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
485 pub volume: Factor,
486
487 /// Speed of the sound.
488 ///
489 /// This is a multiplier of the playback speed and pitch.
490 ///
491 /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
492 /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
493 pub speed: Factor,
494}
495impl AudioOutputConfig {
496 /// New.
497 pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
498 Self { state, volume, speed }
499 }
500}
501
502/// Represents the playback state if an audio output stream.
503#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
504#[non_exhaustive]
505pub enum AudioOutputState {
506 /// Audio is sent to the device for playback as in the sequence they are cued.
507 Playing,
508 /// Audio playback is paused, cue requests are buffered.
509 Paused,
510 /// Audio playback is paused, all current cue requests are dropped.
511 Stopped,
512}
513impl AudioOutputState {
514 /// If is [`Playing`].
515 ///
516 /// [`Playing`]: Self::Playing
517 pub fn is_playing(&self) -> bool {
518 matches!(self, Self::Playing)
519 }
520
521 /// If is [`Paused`].
522 ///
523 /// [`Paused`]: Self::Paused
524 pub fn is_paused(&self) -> bool {
525 matches!(self, Self::Paused)
526 }
527
528 /// If is [`Stopped`].
529 ///
530 /// [`Stopped`]: Self::Stopped
531 pub fn is_stopped(&self) -> bool {
532 matches!(self, Self::Stopped)
533 }
534}
535
536/// Represents an audio playback request.
537#[derive(Debug, Clone, Serialize, Deserialize)]
538#[non_exhaustive]
539pub struct AudioPlayRequest {
540 /// The audio output stream.
541 ///
542 /// If another audio is already playing this request is appended to the end.
543 pub output: AudioOutputId,
544
545 /// The audio.
546 pub mix: AudioMix,
547}
548impl AudioPlayRequest {
549 /// New.
550 pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
551 Self { output, mix }
552 }
553}
554
555/// Represents an audio source.
556#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
557#[non_exhaustive]
558pub struct AudioMix {
559 /// Silent start padding.
560 pub delay: Duration,
561 /// Total duration.
562 ///
563 /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
564 /// plays silent, if layers exceed the duration the end is clipped.
565 pub total_duration: Option<Duration>,
566
567 /// Components of this mix.
568 ///
569 /// Each layer applies to the previous.
570 pub layers: Vec<AudioMixLayer>,
571}
572impl AudioMix {
573 /// New empty.
574 pub fn new() -> Self {
575 Self {
576 delay: Duration::ZERO,
577 total_duration: None,
578 layers: vec![],
579 }
580 }
581}
582impl Default for AudioMix {
583 fn default() -> Self {
584 Self::new()
585 }
586}
587
588/// Represents an audio source component.
589#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
590#[non_exhaustive]
591pub enum AudioMixLayer {
592 /// Play the cached audio.
593 ///
594 /// The audio samples are adapted to the output format and added to the under layers result.
595 Audio {
596 /// The audio.
597 audio: AudioId,
598 /// Clip the start of the audio.
599 ///
600 /// Set to [`Duration::ZERO`] to play from the start.
601 skip: Duration,
602 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
603 ///
604 /// Set to [`Duration::MAX`] to play to the end.
605 take: Duration,
606 },
607 /// Play the mix.
608 ///
609 /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
610 AudioMix {
611 /// The inner mix.
612 mix: AudioMix,
613 /// Clip the start of the audio.
614 ///
615 /// Set to [`Duration::ZERO`] to play from the start.
616 skip: Duration,
617 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
618 ///
619 /// Set to [`Duration::MAX`] to play to the end.
620 take: Duration,
621 },
622
623 /// Linear volume transition.
624 ///
625 /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
626 /// back to the output stream volume after the end, unless another volume control layer is in effect.
627 VolumeLinear {
628 /// Start time.
629 start: Duration,
630 /// Transition duration.
631 ///
632 /// The effect ends at `start + duration` time.
633 duration: Duration,
634
635 /// Volume at the start.
636 start_volume: Factor,
637 /// Volume at the end,
638 end_volume: Factor,
639 },
640
641 /// Generate a sine wave sound.
642 SineWave {
643 /// Sine frequency.
644 frequency: f32,
645 /// Duration of the sample.
646 duration: Duration,
647 },
648}
649
650bitflags! {
651 /// Defines what tracks are decoded from multi track containers.
652 #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
653 pub struct AudioTracksMode: u8 {
654 /// Decodes all tracks.
655 const TRACKS = 0b0001;
656 /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
657 ///
658 /// Note that this is 0, empty.
659 const PRIMARY = 0;
660 }
661}