zng_view_api/audio.rs
1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11crate::declare_id! {
12 /// Audio device ID.
13 ///
14 /// In the View Process this is mapped to a system id.
15 ///
16 /// In the App Process this is mapped to an unique id, but does not survived View crashes.
17 ///
18 /// The View Process defines the ID.
19 pub struct AudioDeviceId(_);
20
21 /// Id of a decoded or on demand decoding audio track in the cache.
22 ///
23 /// The View Process defines the ID.
24 pub struct AudioId(_);
25
26 /// Audio playback stream ID.
27 ///
28 /// In the View Process this is mapped to a system id.
29 ///
30 /// In the App Process this is an unique id that survives View crashes.
31 ///
32 /// The App Process defines the ID.
33 pub struct AudioOutputId(_);
34
35 /// Audio playback request ID.
36 ///
37 /// The View Process defines the ID.
38 pub struct AudioPlayId(_);
39
40 /// Id of an audio encode task.
41 ///
42 /// The View Process defines the ID.
43 pub struct AudioEncodeId(_);
44}
45
46/// Info about an input or output device.
47#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
48#[non_exhaustive]
49pub struct AudioDeviceInfo {
50 /// Device display name.
51 pub name: Txt,
52 /// Device input/output capabilities.
53 pub capabilities: AudioDeviceCapability,
54 /// Input stream modes this device can produce.
55 pub input_modes: Vec<AudioStreamMode>,
56 /// Output stream modes this device can consume.
57 pub output_modes: Vec<AudioStreamMode>,
58}
59
60bitflags! {
61 /// Represents audio device input/output capabilities.
62 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
63 pub struct AudioDeviceCapability: u8 {
64 /// Device can generate audio streams.
65 const INPUT = 0b01;
66 /// Device can consume audio streams.
67 const OUTPUT = 0b11;
68 }
69}
70
71/// Represents steam capability of an audio device.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[non_exhaustive]
74pub struct AudioStreamMode {
75 /// Number of audio channels.
76 pub channels: NonZeroU16,
77 /// Minimum and maximum sample rate.
78 pub sample_rate: SampleRate,
79 /// Minimum and maximum supported buffer size.
80 pub buffer_size: BufferSize,
81}
82
83/// Represents the minimum and maximum sample rate per audio channel.
84///
85/// Values are in samples processed per second.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
87pub struct SampleRate {
88 /// Minimum, inclusive.
89 pub min: u32,
90 /// Maximum, inclusive.
91 pub max: u32,
92}
93
94/// Represents the minimum and maximum supported buffer size for the device.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
96#[non_exhaustive]
97pub enum BufferSize {
98 /// Range in frames per second.
99 Range {
100 /// Minimum, inclusive.
101 min: u32,
102 /// Maximum, inclusive.
103 max: u32,
104 },
105 /// Platform cannot describe buffer size for this device.
106 Unknown,
107}
108
109/// Represent an audio load/decode request.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111#[non_exhaustive]
112pub struct AudioRequest<D> {
113 /// Audio data format.
114 pub format: AudioDataFormat,
115
116 /// Audio data.
117 pub data: D,
118
119 /// Maximum allowed decoded size in bytes.
120 ///
121 /// View-process will avoid decoding and return an error if the track would exceed this limit.
122 pub max_decoded_len: u64,
123
124 /// Defines what tracks are decoded from multi image containers.
125 pub tracks: AudioTracksMode,
126
127 /// Audio is a track (or subtree) of this other audio.
128 ///
129 /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
130 /// an already decoded image is requested after a respawn to maintain the original container structure.
131 pub parent: Option<AudioTrackMetadata>,
132}
133impl<D> AudioRequest<D> {
134 /// New.
135 pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
136 Self {
137 format,
138 data,
139 max_decoded_len,
140 tracks: AudioTracksMode::PRIMARY,
141 parent: None,
142 }
143 }
144}
145
146/// Format of the audio bytes.
147#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
148#[non_exhaustive]
149pub enum AudioDataFormat {
150 /// Data is already decoded stream of interleaved `f32` samples.
151 InterleavedF32 {
152 /// Number of channels interleaved in the track.
153 channel_count: u16,
154 /// Samples per second.
155 ///
156 /// A sample is a single sequence of `channel_count`.
157 sample_rate: u32,
158 /// Total duration of the track, if it is known.
159 total_duration: Option<Duration>,
160 },
161
162 /// The audio is encoded.
163 ///
164 /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
165 /// is unknown or the file header does not match.
166 FileExtension(Txt),
167
168 /// The audio is encoded.
169 ///
170 /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
171 /// is unknown or the file header does not match.
172 MimeType(Txt),
173
174 /// The image is encoded.
175 ///
176 /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
177 Unknown,
178}
179impl From<Txt> for AudioDataFormat {
180 fn from(ext_or_mime: Txt) -> Self {
181 if ext_or_mime.contains('/') {
182 AudioDataFormat::MimeType(ext_or_mime)
183 } else {
184 AudioDataFormat::FileExtension(ext_or_mime)
185 }
186 }
187}
188impl From<&str> for AudioDataFormat {
189 fn from(ext_or_mime: &str) -> Self {
190 Txt::from_str(ext_or_mime).into()
191 }
192}
193
194/// Represents an audio codec capability.
195///
196/// This type will be used in the next breaking release of the view API.
197#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
198#[non_exhaustive]
199pub struct AudioFormat {
200 /// Display name of the format.
201 pub display_name: Txt,
202
203 /// Media types (MIME) associated with the format.
204 ///
205 /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
206 pub media_type_suffixes: Txt,
207
208 /// Common file extensions associated with the format.
209 ///
210 /// Lowercase, without dot, comma separated if there is more than one.
211 pub file_extensions: Txt,
212
213 /// Capabilities of this format.
214 pub capabilities: AudioFormatCapability,
215}
216impl AudioFormat {
217 /// From static str.
218 ///
219 /// # Panics
220 ///
221 /// Panics if `media_type_suffixes` not ASCII.
222 pub const fn from_static(
223 display_name: &'static str,
224 media_type_suffixes: &'static str,
225 file_extensions: &'static str,
226 capabilities: AudioFormatCapability,
227 ) -> Self {
228 assert!(media_type_suffixes.is_ascii());
229 Self {
230 display_name: Txt::from_static(display_name),
231 media_type_suffixes: Txt::from_static(media_type_suffixes),
232 file_extensions: Txt::from_static(file_extensions),
233 capabilities,
234 }
235 }
236
237 /// Iterate over media type suffixes.
238 pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
239 self.media_type_suffixes.split(',').map(|e| e.trim())
240 }
241
242 /// Iterate over full media types, with `"image/"` prefix.
243 pub fn media_types(&self) -> impl Iterator<Item = Txt> {
244 self.media_type_suffixes_iter().map(Txt::from_str)
245 }
246
247 /// Iterate over extensions.
248 pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
249 self.file_extensions.split(',').map(|e| e.trim())
250 }
251
252 /// Checks if `f` matches any of the mime types or any of the file extensions.
253 ///
254 /// File extensions comparison ignores dot and ASCII case.
255 pub fn matches(&self, f: &str) -> bool {
256 let f = f.strip_prefix('.').unwrap_or(f);
257 let f = f.strip_prefix("audio/").unwrap_or(f);
258 self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
259 }
260}
261
262bitflags! {
263 /// Capabilities of an [`AudioFormat`] implementation.
264 ///
265 /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
266 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
267 pub struct AudioFormatCapability: u8 {
268 /// View-process can encode audio in this format.
269 const ENCODE = 0b_0000_0001;
270 }
271}
272
273/// Represent a image encode request.
274#[derive(Debug, Clone, Serialize, Deserialize)]
275#[non_exhaustive]
276pub struct AudioEncodeRequest {
277 /// Audio to encode.
278 pub id: AudioId,
279
280 /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
281 pub format: Txt,
282
283 /// The audio to encode.
284 pub mix: AudioMix,
285}
286impl AudioEncodeRequest {
287 /// New.
288 pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
289 Self { id, format, mix }
290 }
291}
292
293/// Represents decoded header metadata about an audio track.
294#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
295#[non_exhaustive]
296pub struct AudioMetadata {
297 /// Audio ID.
298 pub id: AudioId,
299
300 /// Number of channels interleaved in the track.
301 pub channel_count: u16,
302 /// Samples per second.
303 ///
304 /// A sample is a single sequence of `channel_count`.
305 pub sample_rate: u32,
306 /// Total duration of the track, if it is known.
307 pub total_duration: Option<Duration>,
308
309 /// Track is an entry (or subtree) of this other track.
310 pub parent: Option<AudioTrackMetadata>,
311}
312impl AudioMetadata {
313 /// New.
314 pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
315 Self {
316 id,
317 channel_count,
318 sample_rate,
319 total_duration: None,
320 parent: None,
321 }
322 }
323}
324/// Invalid initial value.
325impl Default for AudioMetadata {
326 fn default() -> Self {
327 Self {
328 id: AudioId::INVALID,
329 channel_count: Default::default(),
330 sample_rate: Default::default(),
331 total_duration: Default::default(),
332 parent: Default::default(),
333 }
334 }
335}
336
337/// Represents decoded header metadata about a track position in the container represented by another audio.
338#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
339#[non_exhaustive]
340pub struct AudioTrackMetadata {
341 /// Image this one belongs too.
342 ///
343 /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
344 pub parent: AudioId,
345 /// Sort index of the track in the list of tracks.
346 pub index: usize,
347}
348impl AudioTrackMetadata {
349 /// New.
350 pub fn new(parent: AudioId, index: usize) -> Self {
351 Self { parent, index }
352 }
353}
354
355/// Represents a partial or fully decoded audio.
356///
357/// See [`Event::AudioDecoded`] for more details.
358///
359/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
360#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
361#[non_exhaustive]
362pub struct AudioDecoded {
363 /// The audio track ID.
364 ///
365 /// An [`AudioMetadata`] for this ID was already notified before this event.
366 pub id: AudioId,
367
368 /// Offset of the `chunk` on the track.
369 ///
370 /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
371 ///
372 /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
373 ///
374 /// [`channel_count`]: AudioMetadata::channel_count
375 pub offset: usize,
376
377 /// Interleaved `f32` samples.
378 pub chunk: IpcBytesCast<f32>,
379
380 /// If the `chunk` is actually the full decoded audio.
381 ///
382 /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
383 /// will read directly from the cache.
384 ///
385 /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
386 /// Depending on the request the audio may never be fully cached, always decoding again on replay.
387 pub is_full: bool,
388}
389impl AudioDecoded {
390 /// New.
391 pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
392 Self {
393 id,
394 offset: 0,
395 chunk,
396 is_full: false,
397 }
398 }
399}
400/// Invalid initial value.
401impl Default for AudioDecoded {
402 fn default() -> Self {
403 Self {
404 id: AudioId::INVALID,
405 offset: Default::default(),
406 chunk: Default::default(),
407 is_full: Default::default(),
408 }
409 }
410}
411
412/// Represents a connection request to an audio output device.
413#[derive(Debug, Clone, Serialize, Deserialize)]
414#[non_exhaustive]
415pub struct AudioOutputRequest {
416 /// ID that will identify the new output.
417 pub id: AudioOutputId,
418
419 /// Initial config.
420 pub config: AudioOutputConfig,
421}
422impl AudioOutputRequest {
423 /// New.
424 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
425 Self { id, config }
426 }
427}
428
429/// Represents an audio playback update request.
430#[derive(Debug, Clone, Serialize, Deserialize)]
431#[non_exhaustive]
432pub struct AudioOutputUpdateRequest {
433 /// The output stream.
434 pub id: AudioOutputId,
435 /// New config.
436 pub config: AudioOutputConfig,
437}
438impl AudioOutputUpdateRequest {
439 /// New.
440 pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
441 Self { id, config }
442 }
443}
444
445/// Represents an audio output stream capabilities.
446///
447/// Any audio played on this output is automatically converted to the channel count and sample rate.
448#[derive(Debug, Clone, Serialize, Deserialize)]
449#[non_exhaustive]
450pub struct AudioOutputOpenData {
451 /// Number of channels interleaved supported by this output.
452 pub channel_count: u16,
453 /// Samples per second.
454 ///
455 /// A sample is a single sequence of `channel_count`.
456 pub sample_rate: u32,
457}
458impl AudioOutputOpenData {
459 /// New.
460 pub fn new(channel_count: u16, sample_rate: u32) -> Self {
461 Self {
462 channel_count,
463 sample_rate,
464 }
465 }
466}
467
468/// Audio playback config.
469#[derive(Debug, Clone, Serialize, Deserialize)]
470#[non_exhaustive]
471pub struct AudioOutputConfig {
472 /// Playback state.
473 pub state: AudioOutputState,
474
475 /// Volume of the sound.
476 ///
477 /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
478 pub volume: Factor,
479
480 /// Speed of the sound.
481 ///
482 /// This is a multiplier of the playback speed and pitch.
483 ///
484 /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
485 /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
486 pub speed: Factor,
487}
488impl AudioOutputConfig {
489 /// New.
490 pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
491 Self { state, volume, speed }
492 }
493}
494
495/// Represents the playback state if an audio output stream.
496#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
497#[non_exhaustive]
498pub enum AudioOutputState {
499 /// Audio is sent to the device for playback as in the sequence they are cued.
500 Playing,
501 /// Audio playback is paused, cue requests are buffered.
502 Paused,
503 /// Audio playback is paused, all current cue requests are dropped.
504 Stopped,
505}
506impl AudioOutputState {
507 /// If is [`Playing`].
508 ///
509 /// [`Playing`]: Self::Playing
510 pub fn is_playing(&self) -> bool {
511 matches!(self, Self::Playing)
512 }
513
514 /// If is [`Paused`].
515 ///
516 /// [`Paused`]: Self::Paused
517 pub fn is_paused(&self) -> bool {
518 matches!(self, Self::Paused)
519 }
520
521 /// If is [`Stopped`].
522 ///
523 /// [`Stopped`]: Self::Stopped
524 pub fn is_stopped(&self) -> bool {
525 matches!(self, Self::Stopped)
526 }
527}
528
529/// Represents an audio playback request.
530#[derive(Debug, Clone, Serialize, Deserialize)]
531#[non_exhaustive]
532pub struct AudioPlayRequest {
533 /// The audio output stream.
534 ///
535 /// If another audio is already playing this request is appended to the end.
536 pub output: AudioOutputId,
537
538 /// The audio.
539 pub mix: AudioMix,
540}
541impl AudioPlayRequest {
542 /// New.
543 pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
544 Self { output, mix }
545 }
546}
547
548/// Represents an audio source.
549#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
550#[non_exhaustive]
551pub struct AudioMix {
552 /// Silent start padding.
553 pub delay: Duration,
554 /// Total duration.
555 ///
556 /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
557 /// plays silent, if layers exceed the duration the end is clipped.
558 pub total_duration: Option<Duration>,
559
560 /// Components of this mix.
561 ///
562 /// Each layer applies to the previous.
563 pub layers: Vec<AudioMixLayer>,
564}
565impl AudioMix {
566 /// New empty.
567 pub fn new() -> Self {
568 Self {
569 delay: Duration::ZERO,
570 total_duration: None,
571 layers: vec![],
572 }
573 }
574}
575impl Default for AudioMix {
576 fn default() -> Self {
577 Self::new()
578 }
579}
580
581/// Represents an audio source component.
582#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
583#[non_exhaustive]
584pub enum AudioMixLayer {
585 /// Play the cached audio.
586 ///
587 /// The audio samples are adapted to the output format and added to the under layers result.
588 Audio {
589 /// The audio.
590 audio: AudioId,
591 /// Clip the start of the audio.
592 ///
593 /// Set to [`Duration::ZERO`] to play from the start.
594 skip: Duration,
595 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
596 ///
597 /// Set to [`Duration::MAX`] to play to the end.
598 take: Duration,
599 },
600 /// Play the mix.
601 ///
602 /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
603 AudioMix {
604 /// The inner mix.
605 mix: AudioMix,
606 /// Clip the start of the audio.
607 ///
608 /// Set to [`Duration::ZERO`] to play from the start.
609 skip: Duration,
610 /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
611 ///
612 /// Set to [`Duration::MAX`] to play to the end.
613 take: Duration,
614 },
615
616 /// Linear volume transition.
617 ///
618 /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
619 /// back to the output stream volume after the end, unless another volume control layer is in effect.
620 VolumeLinear {
621 /// Start time.
622 start: Duration,
623 /// Transition duration.
624 ///
625 /// The effect ends at `start + duration` time.
626 duration: Duration,
627
628 /// Volume at the start.
629 start_volume: Factor,
630 /// Volume at the end,
631 end_volume: Factor,
632 },
633
634 /// Generate a sine wave sound.
635 SineWave {
636 /// Sine frequency.
637 frequency: f32,
638 /// Duration of the sample.
639 duration: Duration,
640 },
641}
642
643bitflags! {
644 /// Defines what tracks are decoded from multi track containers.
645 #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
646 pub struct AudioTracksMode: u8 {
647 /// Decodes all tracks.
648 const TRACKS = 0b0001;
649 /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
650 ///
651 /// Note that this is 0, empty.
652 const PRIMARY = 0;
653 }
654}