zng_view_api/
audio.rs

1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11use crate::api_extension::{ApiExtensionId, ApiExtensionPayload};
12
13crate::declare_id! {
14    /// Audio device ID.
15    ///
16    /// In the View Process this is mapped to a system id.
17    ///
18    /// In the App Process this is mapped to an unique id, but does not survived View crashes.
19    ///
20    /// The View Process defines the ID.
21    pub struct AudioDeviceId(_);
22
23    /// Id of a decoded or on demand decoding audio track in the cache.
24    ///
25    /// The View Process defines the ID.
26    pub struct AudioId(_);
27
28    /// Audio playback stream ID.
29    ///
30    /// In the View Process this is mapped to a system id.
31    ///
32    /// In the App Process this is an unique id that survives View crashes.
33    ///
34    /// The App Process defines the ID.
35    pub struct AudioOutputId(_);
36
37    /// Audio playback request ID.
38    ///
39    /// The View Process defines the ID.
40    pub struct AudioPlayId(_);
41
42    /// Id of an audio encode task.
43    ///
44    /// The View Process defines the ID.
45    pub struct AudioEncodeId(_);
46}
47
48/// Info about an input or output device.
49#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[non_exhaustive]
51pub struct AudioDeviceInfo {
52    /// Device display name.
53    pub name: Txt,
54    /// Device input/output capabilities.
55    pub capabilities: AudioDeviceCapability,
56    /// Input stream modes this device can produce.
57    pub input_modes: Vec<AudioStreamMode>,
58    /// Output stream modes this device can consume.
59    pub output_modes: Vec<AudioStreamMode>,
60}
61
62bitflags! {
63    /// Represents audio device input/output capabilities.
64    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65    pub struct AudioDeviceCapability: u8 {
66        /// Device can generate audio streams.
67        const INPUT = 0b01;
68        /// Device can consume audio streams.
69        const OUTPUT = 0b11;
70    }
71}
72
73/// Represents steam capability of an audio device.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
75#[non_exhaustive]
76pub struct AudioStreamMode {
77    /// Number of audio channels.
78    pub channels: NonZeroU16,
79    /// Minimum and maximum sample rate.
80    pub sample_rate: SampleRate,
81    /// Minimum and maximum supported buffer size.
82    pub buffer_size: BufferSize,
83}
84
85/// Represents the minimum and maximum sample rate per audio channel.
86///
87/// Values are in samples processed per second.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
89pub struct SampleRate {
90    /// Minimum, inclusive.
91    pub min: u32,
92    /// Maximum, inclusive.
93    pub max: u32,
94}
95
96/// Represents the minimum and maximum supported buffer size for the device.
97#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
98#[non_exhaustive]
99pub enum BufferSize {
100    /// Range in frames per second.
101    Range {
102        /// Minimum, inclusive.
103        min: u32,
104        /// Maximum, inclusive.
105        max: u32,
106    },
107    /// Platform cannot describe buffer size for this device.
108    Unknown,
109}
110
111/// Represent an audio load/decode request.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113#[non_exhaustive]
114pub struct AudioRequest<D> {
115    /// Audio data format.
116    pub format: AudioDataFormat,
117
118    /// Audio data.
119    pub data: D,
120
121    /// Maximum allowed decoded size in bytes.
122    ///
123    /// View-process will avoid decoding and return an error if the track would exceed this limit.
124    pub max_decoded_len: u64,
125
126    /// Defines what tracks are decoded from multi image containers.
127    pub tracks: AudioTracksMode,
128
129    /// Audio is a track (or subtree) of this other audio.
130    ///
131    /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
132    /// an already decoded image is requested after a respawn to maintain the original container structure.
133    pub parent: Option<AudioTrackMetadata>,
134}
135impl<D> AudioRequest<D> {
136    /// New.
137    pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
138        Self {
139            format,
140            data,
141            max_decoded_len,
142            tracks: AudioTracksMode::PRIMARY,
143            parent: None,
144        }
145    }
146}
147
148/// Format of the audio bytes.
149#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
150#[non_exhaustive]
151pub enum AudioDataFormat {
152    /// Data is already decoded stream of interleaved `f32` samples.
153    InterleavedF32 {
154        /// Number of channels interleaved in the track.
155        channel_count: u16,
156        /// Samples per second.
157        ///
158        /// A sample is a single sequence of `channel_count`.
159        sample_rate: u32,
160        /// Total duration of the track, if it is known.
161        total_duration: Option<Duration>,
162    },
163
164    /// The audio is encoded.
165    ///
166    /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
167    /// is unknown or the file header does not match.
168    FileExtension(Txt),
169
170    /// The audio is encoded.
171    ///
172    /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
173    /// is unknown or the file header does not match.
174    MimeType(Txt),
175
176    /// The image is encoded.
177    ///
178    /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
179    Unknown,
180}
181impl From<Txt> for AudioDataFormat {
182    fn from(ext_or_mime: Txt) -> Self {
183        if ext_or_mime.contains('/') {
184            AudioDataFormat::MimeType(ext_or_mime)
185        } else {
186            AudioDataFormat::FileExtension(ext_or_mime)
187        }
188    }
189}
190impl From<&str> for AudioDataFormat {
191    fn from(ext_or_mime: &str) -> Self {
192        Txt::from_str(ext_or_mime).into()
193    }
194}
195
196/// Represents an audio codec capability.
197///
198/// This type will be used in the next breaking release of the view API.
199#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
200#[non_exhaustive]
201pub struct AudioFormat {
202    /// Display name of the format.
203    pub display_name: Txt,
204
205    /// Media types (MIME) associated with the format.
206    ///
207    /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
208    pub media_type_suffixes: Txt,
209
210    /// Common file extensions associated with the format.
211    ///
212    /// Lowercase, without dot, comma separated if there is more than one.
213    pub file_extensions: Txt,
214
215    /// Capabilities of this format.
216    pub capabilities: AudioFormatCapability,
217}
218impl AudioFormat {
219    /// From static str.
220    ///
221    /// # Panics
222    ///
223    /// Panics if `media_type_suffixes` not ASCII.
224    pub const fn from_static(
225        display_name: &'static str,
226        media_type_suffixes: &'static str,
227        file_extensions: &'static str,
228        capabilities: AudioFormatCapability,
229    ) -> Self {
230        assert!(media_type_suffixes.is_ascii());
231        Self {
232            display_name: Txt::from_static(display_name),
233            media_type_suffixes: Txt::from_static(media_type_suffixes),
234            file_extensions: Txt::from_static(file_extensions),
235            capabilities,
236        }
237    }
238
239    /// Iterate over media type suffixes.
240    pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
241        self.media_type_suffixes.split(',').map(|e| e.trim())
242    }
243
244    /// Iterate over full media types, with `"image/"` prefix.
245    pub fn media_types(&self) -> impl Iterator<Item = Txt> {
246        self.media_type_suffixes_iter().map(Txt::from_str)
247    }
248
249    /// Iterate over extensions.
250    pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
251        self.file_extensions.split(',').map(|e| e.trim())
252    }
253
254    /// Checks if `f` matches any of the mime types or any of the file extensions.
255    ///
256    /// File extensions comparison ignores dot and ASCII case.
257    pub fn matches(&self, f: &str) -> bool {
258        let f = f.strip_prefix('.').unwrap_or(f);
259        let f = f.strip_prefix("audio/").unwrap_or(f);
260        self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
261    }
262}
263
264bitflags! {
265    /// Capabilities of an [`AudioFormat`] implementation.
266    ///
267    /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
268    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
269    pub struct AudioFormatCapability: u8 {
270        /// View-process can encode audio in this format.
271        const ENCODE = 0b_0000_0001;
272    }
273}
274
275/// Represent a image encode request.
276#[derive(Debug, Clone, Serialize, Deserialize)]
277#[non_exhaustive]
278pub struct AudioEncodeRequest {
279    /// Audio to encode.
280    pub id: AudioId,
281
282    /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
283    pub format: Txt,
284
285    /// The audio to encode.
286    pub mix: AudioMix,
287}
288impl AudioEncodeRequest {
289    /// New.
290    pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
291        Self { id, format, mix }
292    }
293}
294
295/// Represents decoded header metadata about an audio track.
296#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
297#[non_exhaustive]
298pub struct AudioMetadata {
299    /// Audio ID.
300    pub id: AudioId,
301
302    /// Number of channels interleaved in the track.
303    pub channel_count: u16,
304    /// Samples per second.
305    ///
306    /// A sample is a single sequence of `channel_count`.
307    pub sample_rate: u32,
308    /// Total duration of the track, if it is known.
309    pub total_duration: Option<Duration>,
310
311    /// Track is an entry (or subtree) of this other track.
312    pub parent: Option<AudioTrackMetadata>,
313
314    /// Custom metadata.
315    pub extensions: Vec<(ApiExtensionId, ApiExtensionPayload)>,
316}
317impl AudioMetadata {
318    /// New.
319    pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
320        Self {
321            id,
322            channel_count,
323            sample_rate,
324            total_duration: None,
325            parent: None,
326            extensions: vec![],
327        }
328    }
329}
330/// Invalid initial value.
331impl Default for AudioMetadata {
332    fn default() -> Self {
333        Self {
334            id: AudioId::INVALID,
335            channel_count: Default::default(),
336            sample_rate: Default::default(),
337            total_duration: Default::default(),
338            parent: Default::default(),
339            extensions: vec![],
340        }
341    }
342}
343
344/// Represents decoded header metadata about a track position in the container represented by another audio.
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
346#[non_exhaustive]
347pub struct AudioTrackMetadata {
348    /// Image this one belongs too.
349    ///
350    /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
351    pub parent: AudioId,
352    /// Sort index of the track in the list of tracks.
353    pub index: usize,
354}
355impl AudioTrackMetadata {
356    /// New.
357    pub fn new(parent: AudioId, index: usize) -> Self {
358        Self { parent, index }
359    }
360}
361
362/// Represents a partial or fully decoded audio.
363///
364/// See [`Event::AudioDecoded`] for more details.
365///
366/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
367#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
368#[non_exhaustive]
369pub struct AudioDecoded {
370    /// The audio track ID.
371    ///
372    /// An [`AudioMetadata`] for this ID was already notified before this event.
373    pub id: AudioId,
374
375    /// Offset of the `chunk` on the track.
376    ///
377    /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
378    ///
379    /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
380    ///
381    /// [`channel_count`]: AudioMetadata::channel_count
382    pub offset: usize,
383
384    /// Interleaved `f32` samples.
385    pub chunk: IpcBytesCast<f32>,
386
387    /// If the `chunk` is actually the full decoded audio.
388    ///
389    /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
390    /// will read directly from the cache.
391    ///
392    /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
393    /// Depending on the request the audio may never be fully cached, always decoding again on replay.
394    pub is_full: bool,
395}
396impl AudioDecoded {
397    /// New.
398    pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
399        Self {
400            id,
401            offset: 0,
402            chunk,
403            is_full: false,
404        }
405    }
406}
407/// Invalid initial value.
408impl Default for AudioDecoded {
409    fn default() -> Self {
410        Self {
411            id: AudioId::INVALID,
412            offset: Default::default(),
413            chunk: Default::default(),
414            is_full: Default::default(),
415        }
416    }
417}
418
419/// Represents a connection request to an audio output device.
420#[derive(Debug, Clone, Serialize, Deserialize)]
421#[non_exhaustive]
422pub struct AudioOutputRequest {
423    /// ID that will identify the new output.
424    pub id: AudioOutputId,
425
426    /// Initial config.
427    pub config: AudioOutputConfig,
428}
429impl AudioOutputRequest {
430    /// New.
431    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
432        Self { id, config }
433    }
434}
435
436/// Represents an audio playback update request.
437#[derive(Debug, Clone, Serialize, Deserialize)]
438#[non_exhaustive]
439pub struct AudioOutputUpdateRequest {
440    /// The output stream.
441    pub id: AudioOutputId,
442    /// New config.
443    pub config: AudioOutputConfig,
444}
445impl AudioOutputUpdateRequest {
446    /// New.
447    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
448        Self { id, config }
449    }
450}
451
452/// Represents an audio output stream capabilities.
453///
454/// Any audio played on this output is automatically converted to the channel count and sample rate.
455#[derive(Debug, Clone, Serialize, Deserialize)]
456#[non_exhaustive]
457pub struct AudioOutputOpenData {
458    /// Number of channels interleaved supported by this output.
459    pub channel_count: u16,
460    /// Samples per second.
461    ///
462    /// A sample is a single sequence of `channel_count`.
463    pub sample_rate: u32,
464}
465impl AudioOutputOpenData {
466    /// New.
467    pub fn new(channel_count: u16, sample_rate: u32) -> Self {
468        Self {
469            channel_count,
470            sample_rate,
471        }
472    }
473}
474
475/// Audio playback config.
476#[derive(Debug, Clone, Serialize, Deserialize)]
477#[non_exhaustive]
478pub struct AudioOutputConfig {
479    /// Playback state.
480    pub state: AudioOutputState,
481
482    /// Volume of the sound.
483    ///
484    /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
485    pub volume: Factor,
486
487    /// Speed of the sound.
488    ///
489    /// This is a multiplier of the playback speed and pitch.
490    ///
491    /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
492    /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
493    pub speed: Factor,
494}
495impl AudioOutputConfig {
496    /// New.
497    pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
498        Self { state, volume, speed }
499    }
500}
501
502/// Represents the playback state if an audio output stream.
503#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
504#[non_exhaustive]
505pub enum AudioOutputState {
506    /// Audio is sent to the device for playback as in the sequence they are cued.
507    Playing,
508    /// Audio playback is paused, cue requests are buffered.
509    Paused,
510    /// Audio playback is paused, all current cue requests are dropped.
511    Stopped,
512}
513impl AudioOutputState {
514    /// If is [`Playing`].
515    ///
516    /// [`Playing`]: Self::Playing
517    pub fn is_playing(&self) -> bool {
518        matches!(self, Self::Playing)
519    }
520
521    /// If is [`Paused`].
522    ///
523    /// [`Paused`]: Self::Paused
524    pub fn is_paused(&self) -> bool {
525        matches!(self, Self::Paused)
526    }
527
528    /// If is [`Stopped`].
529    ///
530    /// [`Stopped`]: Self::Stopped
531    pub fn is_stopped(&self) -> bool {
532        matches!(self, Self::Stopped)
533    }
534}
535
536/// Represents an audio playback request.
537#[derive(Debug, Clone, Serialize, Deserialize)]
538#[non_exhaustive]
539pub struct AudioPlayRequest {
540    /// The audio output stream.
541    ///
542    /// If another audio is already playing this request is appended to the end.
543    pub output: AudioOutputId,
544
545    /// The audio.
546    pub mix: AudioMix,
547}
548impl AudioPlayRequest {
549    /// New.
550    pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
551        Self { output, mix }
552    }
553}
554
555/// Represents an audio source.
556#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
557#[non_exhaustive]
558pub struct AudioMix {
559    /// Silent start padding.
560    pub delay: Duration,
561    /// Total duration.
562    ///
563    /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
564    /// plays silent, if layers exceed the duration the end is clipped.
565    pub total_duration: Option<Duration>,
566
567    /// Components of this mix.
568    ///
569    /// Each layer applies to the previous.
570    pub layers: Vec<AudioMixLayer>,
571}
572impl AudioMix {
573    /// New empty.
574    pub fn new() -> Self {
575        Self {
576            delay: Duration::ZERO,
577            total_duration: None,
578            layers: vec![],
579        }
580    }
581}
582impl Default for AudioMix {
583    fn default() -> Self {
584        Self::new()
585    }
586}
587
588/// Represents an audio source component.
589#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
590#[non_exhaustive]
591pub enum AudioMixLayer {
592    /// Play the cached audio.
593    ///
594    /// The audio samples are adapted to the output format and added to the under layers result.
595    Audio {
596        /// The audio.
597        audio: AudioId,
598        /// Clip the start of the audio.
599        ///
600        /// Set to [`Duration::ZERO`] to play from the start.
601        skip: Duration,
602        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
603        ///
604        /// Set to [`Duration::MAX`] to play to the end.
605        take: Duration,
606    },
607    /// Play the mix.
608    ///
609    /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
610    AudioMix {
611        /// The inner mix.
612        mix: AudioMix,
613        /// Clip the start of the audio.
614        ///
615        /// Set to [`Duration::ZERO`] to play from the start.
616        skip: Duration,
617        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
618        ///
619        /// Set to [`Duration::MAX`] to play to the end.
620        take: Duration,
621    },
622
623    /// Linear volume transition.
624    ///
625    /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
626    /// back to the output stream volume after the end, unless another volume control layer is in effect.
627    VolumeLinear {
628        /// Start time.
629        start: Duration,
630        /// Transition duration.
631        ///
632        /// The effect ends at `start + duration` time.
633        duration: Duration,
634
635        /// Volume at the start.
636        start_volume: Factor,
637        /// Volume at the end,
638        end_volume: Factor,
639    },
640
641    /// Generate a sine wave sound.
642    SineWave {
643        /// Sine frequency.
644        frequency: f32,
645        /// Duration of the sample.
646        duration: Duration,
647    },
648}
649
650bitflags! {
651    /// Defines what tracks are decoded from multi track containers.
652    #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
653    pub struct AudioTracksMode: u8 {
654        /// Decodes all tracks.
655        const TRACKS = 0b0001;
656        /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
657        ///
658        /// Note that this is 0, empty.
659        const PRIMARY = 0;
660    }
661}