zng_view_api/
audio.rs

1//! Audio device types.
2
3use std::{num::NonZeroU16, time::Duration};
4
5use bitflags::bitflags;
6use serde::{Deserialize, Serialize};
7use zng_task::channel::IpcBytesCast;
8use zng_txt::Txt;
9use zng_unit::Factor;
10
11crate::declare_id! {
12    /// Audio device ID.
13    ///
14    /// In the View Process this is mapped to a system id.
15    ///
16    /// In the App Process this is mapped to an unique id, but does not survived View crashes.
17    ///
18    /// The View Process defines the ID.
19    pub struct AudioDeviceId(_);
20
21    /// Id of a decoded or on demand decoding audio track in the cache.
22    ///
23    /// The View Process defines the ID.
24    pub struct AudioId(_);
25
26    /// Audio playback stream ID.
27    ///
28    /// In the View Process this is mapped to a system id.
29    ///
30    /// In the App Process this is an unique id that survives View crashes.
31    ///
32    /// The App Process defines the ID.
33    pub struct AudioOutputId(_);
34
35    /// Audio playback request ID.
36    ///
37    /// The View Process defines the ID.
38    pub struct AudioPlayId(_);
39
40    /// Id of an audio encode task.
41    ///
42    /// The View Process defines the ID.
43    pub struct AudioEncodeId(_);
44}
45
46/// Info about an input or output device.
47#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
48#[non_exhaustive]
49pub struct AudioDeviceInfo {
50    /// Device display name.
51    pub name: Txt,
52    /// Device input/output capabilities.
53    pub capabilities: AudioDeviceCapability,
54    /// Input stream modes this device can produce.
55    pub input_modes: Vec<AudioStreamMode>,
56    /// Output stream modes this device can consume.
57    pub output_modes: Vec<AudioStreamMode>,
58}
59
60bitflags! {
61    /// Represents audio device input/output capabilities.
62    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
63    pub struct AudioDeviceCapability: u8 {
64        /// Device can generate audio streams.
65        const INPUT = 0b01;
66        /// Device can consume audio streams.
67        const OUTPUT = 0b11;
68    }
69}
70
71/// Represents steam capability of an audio device.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[non_exhaustive]
74pub struct AudioStreamMode {
75    /// Number of audio channels.
76    pub channels: NonZeroU16,
77    /// Minimum and maximum sample rate.
78    pub sample_rate: SampleRate,
79    /// Minimum and maximum supported buffer size.
80    pub buffer_size: BufferSize,
81}
82
83/// Represents the minimum and maximum sample rate per audio channel.
84///
85/// Values are in samples processed per second.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
87pub struct SampleRate {
88    /// Minimum, inclusive.
89    pub min: u32,
90    /// Maximum, inclusive.
91    pub max: u32,
92}
93
94/// Represents the minimum and maximum supported buffer size for the device.
95#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
96#[non_exhaustive]
97pub enum BufferSize {
98    /// Range in frames per second.
99    Range {
100        /// Minimum, inclusive.
101        min: u32,
102        /// Maximum, inclusive.
103        max: u32,
104    },
105    /// Platform cannot describe buffer size for this device.
106    Unknown,
107}
108
109/// Represent an audio load/decode request.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111#[non_exhaustive]
112pub struct AudioRequest<D> {
113    /// Audio data format.
114    pub format: AudioDataFormat,
115
116    /// Audio data.
117    pub data: D,
118
119    /// Maximum allowed decoded size in bytes.
120    ///
121    /// View-process will avoid decoding and return an error if the track would exceed this limit.
122    pub max_decoded_len: u64,
123
124    /// Defines what tracks are decoded from multi image containers.
125    pub tracks: AudioTracksMode,
126
127    /// Audio is a track (or subtree) of this other audio.
128    ///
129    /// This value is now used by the view-process, it is just returned with the metadata. This is useful when
130    /// an already decoded image is requested after a respawn to maintain the original container structure.
131    pub parent: Option<AudioTrackMetadata>,
132}
133impl<D> AudioRequest<D> {
134    /// New.
135    pub fn new(format: AudioDataFormat, data: D, max_decoded_len: u64) -> Self {
136        Self {
137            format,
138            data,
139            max_decoded_len,
140            tracks: AudioTracksMode::PRIMARY,
141            parent: None,
142        }
143    }
144}
145
146/// Format of the audio bytes.
147#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
148#[non_exhaustive]
149pub enum AudioDataFormat {
150    /// Data is already decoded stream of interleaved `f32` samples.
151    InterleavedF32 {
152        /// Number of channels interleaved in the track.
153        channel_count: u16,
154        /// Samples per second.
155        ///
156        /// A sample is a single sequence of `channel_count`.
157        sample_rate: u32,
158        /// Total duration of the track, if it is known.
159        total_duration: Option<Duration>,
160    },
161
162    /// The audio is encoded.
163    ///
164    /// This file extension maybe identifies the format. Fallback to `Unknown` handling if the file extension
165    /// is unknown or the file header does not match.
166    FileExtension(Txt),
167
168    /// The audio is encoded.
169    ///
170    /// This MIME type maybe identifies the format. Fallback to `Unknown` handling if the file extension
171    /// is unknown or the file header does not match.
172    MimeType(Txt),
173
174    /// The image is encoded.
175    ///
176    /// A decoder will be selected using the "magic number" at the start of the bytes buffer.
177    Unknown,
178}
179impl From<Txt> for AudioDataFormat {
180    fn from(ext_or_mime: Txt) -> Self {
181        if ext_or_mime.contains('/') {
182            AudioDataFormat::MimeType(ext_or_mime)
183        } else {
184            AudioDataFormat::FileExtension(ext_or_mime)
185        }
186    }
187}
188impl From<&str> for AudioDataFormat {
189    fn from(ext_or_mime: &str) -> Self {
190        Txt::from_str(ext_or_mime).into()
191    }
192}
193
194/// Represents an audio codec capability.
195///
196/// This type will be used in the next breaking release of the view API.
197#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
198#[non_exhaustive]
199pub struct AudioFormat {
200    /// Display name of the format.
201    pub display_name: Txt,
202
203    /// Media types (MIME) associated with the format.
204    ///
205    /// Lowercase, without `"audio/"` prefix, comma separated if there is more than one.
206    pub media_type_suffixes: Txt,
207
208    /// Common file extensions associated with the format.
209    ///
210    /// Lowercase, without dot, comma separated if there is more than one.
211    pub file_extensions: Txt,
212
213    /// Capabilities of this format.
214    pub capabilities: AudioFormatCapability,
215}
216impl AudioFormat {
217    /// From static str.
218    ///
219    /// # Panics
220    ///
221    /// Panics if `media_type_suffixes` not ASCII.
222    pub const fn from_static(
223        display_name: &'static str,
224        media_type_suffixes: &'static str,
225        file_extensions: &'static str,
226        capabilities: AudioFormatCapability,
227    ) -> Self {
228        assert!(media_type_suffixes.is_ascii());
229        Self {
230            display_name: Txt::from_static(display_name),
231            media_type_suffixes: Txt::from_static(media_type_suffixes),
232            file_extensions: Txt::from_static(file_extensions),
233            capabilities,
234        }
235    }
236
237    /// Iterate over media type suffixes.
238    pub fn media_type_suffixes_iter(&self) -> impl Iterator<Item = &str> {
239        self.media_type_suffixes.split(',').map(|e| e.trim())
240    }
241
242    /// Iterate over full media types, with `"image/"` prefix.
243    pub fn media_types(&self) -> impl Iterator<Item = Txt> {
244        self.media_type_suffixes_iter().map(Txt::from_str)
245    }
246
247    /// Iterate over extensions.
248    pub fn file_extensions_iter(&self) -> impl Iterator<Item = &str> {
249        self.file_extensions.split(',').map(|e| e.trim())
250    }
251
252    /// Checks if `f` matches any of the mime types or any of the file extensions.
253    ///
254    /// File extensions comparison ignores dot and ASCII case.
255    pub fn matches(&self, f: &str) -> bool {
256        let f = f.strip_prefix('.').unwrap_or(f);
257        let f = f.strip_prefix("audio/").unwrap_or(f);
258        self.media_type_suffixes_iter().any(|e| e.eq_ignore_ascii_case(f)) || self.file_extensions_iter().any(|e| e.eq_ignore_ascii_case(f))
259    }
260}
261
262bitflags! {
263    /// Capabilities of an [`AudioFormat`] implementation.
264    ///
265    /// Note that `DECODE` capability is omitted because the view-process can always decode formats.
266    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
267    pub struct AudioFormatCapability: u8 {
268        /// View-process can encode audio in this format.
269        const ENCODE = 0b_0000_0001;
270    }
271}
272
273/// Represent a image encode request.
274#[derive(Debug, Clone, Serialize, Deserialize)]
275#[non_exhaustive]
276pub struct AudioEncodeRequest {
277    /// Audio to encode.
278    pub id: AudioId,
279
280    /// Format query, view-process uses [`AudioFormat::matches`] to find the format.
281    pub format: Txt,
282
283    /// The audio to encode.
284    pub mix: AudioMix,
285}
286impl AudioEncodeRequest {
287    /// New.
288    pub fn new(id: AudioId, format: Txt, mix: AudioMix) -> Self {
289        Self { id, format, mix }
290    }
291}
292
293/// Represents decoded header metadata about an audio track.
294#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
295#[non_exhaustive]
296pub struct AudioMetadata {
297    /// Audio ID.
298    pub id: AudioId,
299
300    /// Number of channels interleaved in the track.
301    pub channel_count: u16,
302    /// Samples per second.
303    ///
304    /// A sample is a single sequence of `channel_count`.
305    pub sample_rate: u32,
306    /// Total duration of the track, if it is known.
307    pub total_duration: Option<Duration>,
308
309    /// Track is an entry (or subtree) of this other track.
310    pub parent: Option<AudioTrackMetadata>,
311}
312impl AudioMetadata {
313    /// New.
314    pub fn new(id: AudioId, channel_count: u16, sample_rate: u32) -> Self {
315        Self {
316            id,
317            channel_count,
318            sample_rate,
319            total_duration: None,
320            parent: None,
321        }
322    }
323}
324/// Invalid initial value.
325impl Default for AudioMetadata {
326    fn default() -> Self {
327        Self {
328            id: AudioId::INVALID,
329            channel_count: Default::default(),
330            sample_rate: Default::default(),
331            total_duration: Default::default(),
332            parent: Default::default(),
333        }
334    }
335}
336
337/// Represents decoded header metadata about a track position in the container represented by another audio.
338#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
339#[non_exhaustive]
340pub struct AudioTrackMetadata {
341    /// Image this one belongs too.
342    ///
343    /// The view-process always sends the parent image metadata first, so this id should be known by the app-process.
344    pub parent: AudioId,
345    /// Sort index of the track in the list of tracks.
346    pub index: usize,
347}
348impl AudioTrackMetadata {
349    /// New.
350    pub fn new(parent: AudioId, index: usize) -> Self {
351        Self { parent, index }
352    }
353}
354
355/// Represents a partial or fully decoded audio.
356///
357/// See [`Event::AudioDecoded`] for more details.
358///
359/// [`Event::AudioDecoded`]: crate::Event::AudioDecoded
360#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
361#[non_exhaustive]
362pub struct AudioDecoded {
363    /// The audio track ID.
364    ///
365    /// An [`AudioMetadata`] for this ID was already notified before this event.
366    pub id: AudioId,
367
368    /// Offset of the `chunk` on the track.
369    ///
370    /// This is a count in samples before the first in this chunk, a sample is a sequence of [`channel_count`].
371    ///
372    /// To convert offset to bytes `offset * channel_count * size_of::<f32>()`.
373    ///
374    /// [`channel_count`]: AudioMetadata::channel_count
375    pub offset: usize,
376
377    /// Interleaved `f32` samples.
378    pub chunk: IpcBytesCast<f32>,
379
380    /// If the `chunk` is actually the full decoded audio.
381    ///
382    /// When this is `true` no more decode events for the `id` are send, (re)playing the audio
383    /// will read directly from the cache.
384    ///
385    /// When this is `false` the `chunk` represent the last decoded chunk on demand because the audio is playing.
386    /// Depending on the request the audio may never be fully cached, always decoding again on replay.
387    pub is_full: bool,
388}
389impl AudioDecoded {
390    /// New.
391    pub fn new(id: AudioId, chunk: IpcBytesCast<f32>) -> Self {
392        Self {
393            id,
394            offset: 0,
395            chunk,
396            is_full: false,
397        }
398    }
399}
400/// Invalid initial value.
401impl Default for AudioDecoded {
402    fn default() -> Self {
403        Self {
404            id: AudioId::INVALID,
405            offset: Default::default(),
406            chunk: Default::default(),
407            is_full: Default::default(),
408        }
409    }
410}
411
412/// Represents a connection request to an audio output device.
413#[derive(Debug, Clone, Serialize, Deserialize)]
414#[non_exhaustive]
415pub struct AudioOutputRequest {
416    /// ID that will identify the new output.
417    pub id: AudioOutputId,
418
419    /// Initial config.
420    pub config: AudioOutputConfig,
421}
422impl AudioOutputRequest {
423    /// New.
424    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
425        Self { id, config }
426    }
427}
428
429/// Represents an audio playback update request.
430#[derive(Debug, Clone, Serialize, Deserialize)]
431#[non_exhaustive]
432pub struct AudioOutputUpdateRequest {
433    /// The output stream.
434    pub id: AudioOutputId,
435    /// New config.
436    pub config: AudioOutputConfig,
437}
438impl AudioOutputUpdateRequest {
439    /// New.
440    pub fn new(id: AudioOutputId, config: AudioOutputConfig) -> Self {
441        Self { id, config }
442    }
443}
444
445/// Represents an audio output stream capabilities.
446///
447/// Any audio played on this output is automatically converted to the channel count and sample rate.
448#[derive(Debug, Clone, Serialize, Deserialize)]
449#[non_exhaustive]
450pub struct AudioOutputOpenData {
451    /// Number of channels interleaved supported by this output.
452    pub channel_count: u16,
453    /// Samples per second.
454    ///
455    /// A sample is a single sequence of `channel_count`.
456    pub sample_rate: u32,
457}
458impl AudioOutputOpenData {
459    /// New.
460    pub fn new(channel_count: u16, sample_rate: u32) -> Self {
461        Self {
462            channel_count,
463            sample_rate,
464        }
465    }
466}
467
468/// Audio playback config.
469#[derive(Debug, Clone, Serialize, Deserialize)]
470#[non_exhaustive]
471pub struct AudioOutputConfig {
472    /// Playback state.
473    pub state: AudioOutputState,
474
475    /// Volume of the sound.
476    ///
477    /// The value multiplies the samples, `1.fct()` is the *natural* volume from the source.
478    pub volume: Factor,
479
480    /// Speed of the sound.
481    ///
482    /// This is a multiplier of the playback speed and pitch.
483    ///
484    /// * `0.5.fct()` doubles the total duration and halves (lowers) the pitch.
485    /// * `2.fct()` halves the total duration and doubles (raises) the pitch.
486    pub speed: Factor,
487}
488impl AudioOutputConfig {
489    /// New.
490    pub fn new(state: AudioOutputState, volume: Factor, speed: Factor) -> Self {
491        Self { state, volume, speed }
492    }
493}
494
495/// Represents the playback state if an audio output stream.
496#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
497#[non_exhaustive]
498pub enum AudioOutputState {
499    /// Audio is sent to the device for playback as in the sequence they are cued.
500    Playing,
501    /// Audio playback is paused, cue requests are buffered.
502    Paused,
503    /// Audio playback is paused, all current cue requests are dropped.
504    Stopped,
505}
506impl AudioOutputState {
507    /// If is [`Playing`].
508    ///
509    /// [`Playing`]: Self::Playing
510    pub fn is_playing(&self) -> bool {
511        matches!(self, Self::Playing)
512    }
513
514    /// If is [`Paused`].
515    ///
516    /// [`Paused`]: Self::Paused
517    pub fn is_paused(&self) -> bool {
518        matches!(self, Self::Paused)
519    }
520
521    /// If is [`Stopped`].
522    ///
523    /// [`Stopped`]: Self::Stopped
524    pub fn is_stopped(&self) -> bool {
525        matches!(self, Self::Stopped)
526    }
527}
528
529/// Represents an audio playback request.
530#[derive(Debug, Clone, Serialize, Deserialize)]
531#[non_exhaustive]
532pub struct AudioPlayRequest {
533    /// The audio output stream.
534    ///
535    /// If another audio is already playing this request is appended to the end.
536    pub output: AudioOutputId,
537
538    /// The audio.
539    pub mix: AudioMix,
540}
541impl AudioPlayRequest {
542    /// New.
543    pub fn new(output: AudioOutputId, mix: AudioMix) -> Self {
544        Self { output, mix }
545    }
546}
547
548/// Represents an audio source.
549#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
550#[non_exhaustive]
551pub struct AudioMix {
552    /// Silent start padding.
553    pub delay: Duration,
554    /// Total duration.
555    ///
556    /// If not set audio plays until the last layer. If set audio plays for the duration, if layers end before the duration
557    /// plays silent, if layers exceed the duration the end is clipped.
558    pub total_duration: Option<Duration>,
559
560    /// Components of this mix.
561    ///
562    /// Each layer applies to the previous.
563    pub layers: Vec<AudioMixLayer>,
564}
565impl AudioMix {
566    /// New empty.
567    pub fn new() -> Self {
568        Self {
569            delay: Duration::ZERO,
570            total_duration: None,
571            layers: vec![],
572        }
573    }
574}
575impl Default for AudioMix {
576    fn default() -> Self {
577        Self::new()
578    }
579}
580
581/// Represents an audio source component.
582#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
583#[non_exhaustive]
584pub enum AudioMixLayer {
585    /// Play the cached audio.
586    ///
587    /// The audio samples are adapted to the output format and added to the under layers result.
588    Audio {
589        /// The audio.
590        audio: AudioId,
591        /// Clip the start of the audio.
592        ///
593        /// Set to [`Duration::ZERO`] to play from the start.
594        skip: Duration,
595        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
596        ///
597        /// Set to [`Duration::MAX`] to play to the end.
598        take: Duration,
599    },
600    /// Play the mix.
601    ///
602    /// This mix is sampled as an audio (computed), its effect layers do not affect the parent mix.
603    AudioMix {
604        /// The inner mix.
605        mix: AudioMix,
606        /// Clip the start of the audio.
607        ///
608        /// Set to [`Duration::ZERO`] to play from the start.
609        skip: Duration,
610        /// Clip the end of the audio. Does not add padding, if `skip + take` is greater than the audio length stops early.
611        ///
612        /// Set to [`Duration::MAX`] to play to the end.
613        take: Duration,
614    },
615
616    /// Linear volume transition.
617    ///
618    /// When the playback is in range the volume is multiplied by the linear interpolation between `start_volume` and `end_volume`. The volume snaps
619    /// back to the output stream volume after the end, unless another volume control layer is in effect.
620    VolumeLinear {
621        /// Start time.
622        start: Duration,
623        /// Transition duration.
624        ///
625        /// The effect ends at `start + duration` time.
626        duration: Duration,
627
628        /// Volume at the start.
629        start_volume: Factor,
630        /// Volume at the end,
631        end_volume: Factor,
632    },
633
634    /// Generate a sine wave sound.
635    SineWave {
636        /// Sine frequency.
637        frequency: f32,
638        /// Duration of the sample.
639        duration: Duration,
640    },
641}
642
643bitflags! {
644    /// Defines what tracks are decoded from multi track containers.
645    #[derive(Copy, Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)]
646    pub struct AudioTracksMode: u8 {
647        /// Decodes all tracks.
648        const TRACKS = 0b0001;
649        /// Decodes only the first track, or the track explicitly marked as primary/default by the container format.
650        ///
651        /// Note that this is 0, empty.
652        const PRIMARY = 0;
653    }
654}