TappedAudio: A language and implementation independent audio library

Summary

TappedAudio is an audio library with an emphasis on the programming interface -- independent of the backend, written in highly-encapsulated C++ and using SWIG to present a unified API for C++, Python, Ruby, etc., regardless of the underlying audio platform.

Features

Until I have time for something prettier here, have a look at the Project Page or the API Documentation (and the examples in particular). There is also Developer API Documentation.

Made with /bin/cat by Trent Apted on 2005-11-08. Updated on 2007-01-19 with /usr/bin/nano.

SourceForge.net Logo

TappedAudio API

/* $Id: taptaudio.h 7 2008-04-21 03:54:50Z tapted $ $URL: https://tappedaudio.svn.sourceforge.net/svnroot/tappedaudio/tappedaudio/trunk/include/taptaudio.h $ */
#ifndef TAPTAUDIO_DOT_AITCH
#define TAPTAUDIO_DOT_AITCH

/**\file taptaudio.h
 * Tapted's Audio Subsystem manager singleton
 * \author Trent Apted <tapted@it.usyd.edu.au>
 * $Revision: 7 $
 * $Date: 2008-04-21 13:54:50 +1000 (Mon, 21 Apr 2008) $
 */

#ifndef SWIG
#include <string>
#endif

/** Possible PCM Audio formats for the mixer */
enum AUDIO_FORMAT {
    AF_Default,     ///< Default format
    AF_Float32,     ///< 32-bit floats
    AF_Int16,       ///< 16-bit signed ints
    AF_Int32,       ///< 32-bit signed ints
    AF_Int24,       ///< NFI - not supported
    AF_PackedInt24, ///< NFI - not supported
    AF_Int8,        ///< 8-bit signed ints
    AF_UInt8,       ///< 8-bit unsigned ints (not supported)
    AF_CustomFormat ///< Some custom format (not supported)
};

/**
 * Possible file encodings. For loading, many others will magically
 * work by using the magic bytes at the start of the file (and thanks
 * to libsndfile). However for now, these are all I've been bothered
 * to do mappings for when saving.
 */
enum FILE_ENCODING {
    AF_ENC_WAV,         ///< WAV File (defaults to PCM)
    AF_ENC_RAW,         ///< RAW format (PCM)
    AF_ENC_OGG,         ///< OGG File -- not yet implemented
    AF_ENC_FLAC,        ///< Compressed, lossless FLAC
    AF_ENC_AU,          ///< AU File (defaults to PCM)
    AF_ENC_AIFF,        ///< AIFF File
    AF_ENC_CAF,         ///< Apple CAF File
    AF_MASK_FILE  = 0x00ff, ///< Mask for file type
    AF_SHIFT_FILE = 0,      ///< right bit-shift for file (header) type

    /* the encodings below must be OR-ed with a header format above */
    AF_ENC_PCM   = 0x0000, ///< PCM encoding (default)
    AF_ENC_MLAW  = 0x0100, ///< mu-law encoding
    AF_ENC_ALAW  = 0x0200, ///< A-law encoding
    AF_ENC_ADPCM = 0x0300, ///< ADPCM -- WAV or AU only, valid qualities are 24 (default), 32 or 40 (kbps)
    AF_ENC_DWVW  = 0x0400, ///< DWVW -- AIFF or RAW only
    AF_ENC_VORBIS = 0x0500, ///< Vorbis encoding (ogg) -- not yet implemented
    AF_ENC_SPEEX  = 0x0600, ///< Speex encoding (ogg) -- not yet implemented
    AF_MASK_ENCODING = 0x0f00, ///< Mask for encoding
    AF_SHIFT_ENCODING = 8,     ///< right bit-shift for encoding

    /* use the encodings below to force a PCM format other than that of the audio system */
    AF_ENC_PCM_AUDIOSYS = 0x0000, ///< Use the same audio format as the currently running AudioSystem
    AF_ENC_PCM_UNSIGNED = 0x1000, ///< Unsigned 8-bit -- WAV, AIFF and RAW only
    AF_ENC_PCM_8        = 0x2000, ///< Signed 8-bit integer -- all except WAV
    AF_ENC_PCM_16       = 0x3000, ///< Signed 16-bit integer
    AF_ENC_PCM_24       = 0x5000, ///< Signed 24-bit integer
    AF_ENC_PCM_32       = 0x6000, ///< Signed 32-bit integer
    AF_ENC_PCM_FLOAT    = 0x7000, ///< 32-bit floating point
    AF_ENC_PCM_DOUBLE   = 0x8000, ///< 64-bit floating point
    AF_MASK_PCM_FORMAT  = 0xf000, ///< Mask for PCM format
    AF_SHIFT_PCM_FORMAT = 12,     ///< right bit-shift for pcm format

    AF_MASK_FILE_ENCODING = 0xffff ///< Mask for file and encoding description
};

/** Audio backends */
enum AUDIO_BACKEND {
    AF_NOAUDIO   = 0,         ///< Use a dummy backend
    AF_PORTAUDIO = 1,         ///< Use Portaudio backend (default host)
    AF_JACKAUDIO = 2,         ///< Use JACK Audio Connection Toolkit backend
    AF_PORTAUDIO_OSS  = 5,
    AF_PORTAUDIO_ALSA = 9,
    AF_PORTAUDIO_JACK = 13,
    AF_PORTAUDIO_MME  = 17,
    AF_PORTAUDIO_ASIO = 21,
    AF_PORTAUDIO_AL   = 25,
    AF_PORTAUDIO_BEOS = 29,
    AF_PORTAUDIO_WDMKS        = 33,
    AF_PORTAUDIO_COREAUDIO    = 37,
    AF_PORTAUDIO_SOUNDMANAGER = 41,
    AF_PORTAUDIO_DIRECTSOUND  = 45,
    AF_PORTAUDIO_ALSA_BLOCK   = 49,
    AF_PORTAUDIO_ABSOLUTE     = 53 ///< Device IDs specified for portaudio are absolute IDs
    /* that's all I've implemented so far.. */
};

class AudioSystemImpl; //forward dec
class ASSample; //forward dec

/** Recording Options to pass to stopRec */
/* Ideally we want these to be inside AudioSystem, but SWIG doesn't support nested classes yet */
struct RecordOpts {
    bool keep_and_return; ///< (true) Retain a handle on the recording, and return it (or a new ASSample* for streams); else, freeSample() it.
    bool save;            ///< (true) If true, save the recording to disk, otherwise remaining options are ignored
    bool save_in_thread;  ///< (true) Save to disk in a separate thread (managed by a ThreadMan)
    FILE_ENCODING enc;    ///< (AF_ENC_WAV) The file encoding to use
    float quality;        ///< (2.0) Recording quality
};

/** RecordOptions class introduces constructors */
class RecordOptions : public RecordOpts {
public:
    void set (bool keep_and_return = true, bool save = true, bool save_in_thread = true, FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    RecordOptions (bool keep_and_return = true, bool save = true, bool save_in_thread = true, FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    RecordOptions (FILE_ENCODING enc, bool save_in_thread = true, bool keep_and_return = true, float quality = 2.0);
    RecordOptions (FILE_ENCODING enc, float quality);
};

/**
 * The audio system. Only one of these may exist at a time.
 * You may delete the old copy and make a new one if you wish.
 */
class AudioSystem {

public:
    typedef ::RecordOpts RecordOpts;       ///< Alias for RecordOpts
    typedef ::RecordOptions RecordOptions; ///< Alias for RecordOptions

    /* configuration variables */
    /**
     * Number of audio tracks in mixer.
     *  - 0 will disable audio (singleton will never be created)
     *  - 1 will use a super-fast non-mixer version
     *  - >1 will use a stereo/mono N-track mixer
     * default: 32
     * \note track overhead (i.e. >2) is negligible (i.e. vs 2)
     */
    static unsigned AUDIO_TRACKS;

    /**
     * Maximum number of simultaneous recordings permitted. These
     * take up memory, so there shouldn't be a huge number.
     * default: 8
     */
    static unsigned RECORD_TRACKS;

    /**
     * The "actual" default audio format, to keep in one, global place
     */
    static AUDIO_FORMAT DEFAULT_AUDIO_FORMAT;

    /**
     * Size, in bytes, of the ringbuffers used for streaming.
     * MUST be a power of 2.
     * \note 1 second takes the order of 176,400 bytes, so you probably
     * want something like 131072, 262144, 524288, 1048576, ...
     */
    static unsigned long RINGBUFFER_BYTES;

    /**
     * Size, in bytes, of the ringbuffers used for streaming when recording.
     */
    static unsigned long RINGBUFFER_RECORDBYTES;

    /**
     * If true, never `delete` the implementation -- leak it
     * (default is false)
     */
    static bool NEVER_DELETE_IMPL;

protected:
    ///no copying - we are a Singleton
    AudioSystem(const AudioSystem&);// {}

#ifndef SWIG
    ///no assignment - we are a Singleton
    AudioSystem& operator=(const AudioSystem&);// {return *this;}
#endif

    AudioSystemImpl* impl; ///< The pointer implementation
    static AudioSystem* instance; ///< The singleton

public:
    /**
     * Create the audio system.
     *
     * \param format The format to try and mix audio in (some backends may force a particular format)
     * \param sampleRate The sample rate to run the AudioSystem in
     * \param inputChannels The number of input channels (i.e. mono/stereo microphone)
     * \param outputChannels The number of output channels (i.e. mono/stereo/surround)
     * \param deviceIDin The input device identifier (usually backend-dependent, -1 is default/best, use listDevices() for a list)
     * \param deviceIDout The input output identifier
     * \param backends A fallback list of backends to attempt to initialize THIS MUST BE TERMINATED BY AF_NOAUDIO (or NULL/0)
     */

    AudioSystem(AUDIO_FORMAT format = AF_Default,
                double sampleRate = 0.0,
                int inputChannels = 2,
                int outputChannels = 2,
                int deviceIDin = -1,
                int deviceIDout = -1,
                const AUDIO_BACKEND backends[] = NULL
               );

    /**
     * Get the singleton instance. Returns NULL if it has
     * not been created, or it has been destroyed.
     */
    static AudioSystem* get();// {return instance;} //no good for dependent DLLs

    /**
     * Returns true if the AudioSystem is running.
     */
    static bool isRunning();// {return instance;} //no good for dependent DLLs

    /**
     * Set the debugging level for the audio system. 0 for none,
     * higher means more messages
     */
    static void setDebugLevel(unsigned level);

    /**
     * Set the FILE* at which debugging output is sent
     */
    static void setDebugFile(void* FILESTAR);

    /**
     * Lists the device IDs to stderr
     */
    static void listDevices();

    /**
     * Destructor destroys the singleton, allowing another to be made.
     */
    ~AudioSystem();

    //@{
    /**
     * Load a sample that will be managed by the AudioSystem.
     * Client code will just use the return value as a pointer
     * to pass back into the audio system. Most of this can occur
     * in a background thread .. but maybe not two at once.
     *
     * \note If \a name is not registered, it must refer to a file AND
     * the file must be in a format convertible to the audio system.
     *
     * \param name A filename, or the name of a sample "known" to the AudioSystem
     * \param stream If true, stream the sample from disk (breaks mixing multiple copies of this sample)
     * \return a loaded sample, that can be passed back to the AudioSystem
     */
    ASSample* loadSample(const char* name, bool stream = false);

#ifndef SWIG
    ASSample* loadSample(const std::string &name, bool stream = false);

#endif
    //@}

    //@{
    /**
     * Give a name mapping for a sample NOT in the registry
     *
     * \param name A the name to give the sample
     * \param sam The sample to map
     * \return true if successful
     */
    bool map(const char* name, ASSample *sam);

    bool map(ASSample *sam);
#ifndef SWIG
    bool map(const std::string &name, ASSample *sam);
#endif
    //@}

    /**
     * Free all resources used by a sample, ONLY IF THE REFERENCE COUNT IS ZERO.
     * Will also remove itself from the sample registry. Only ever use this to
     * free a sample -- never delete() your reference.
     *
     * \param samp the sample to free
     * \return true if the reference count was zero and the sample was freed (i.e. success)
     */
    bool freeSample(ASSample* samp);

    //@{
    /**
     * Load a raw sample from memory. Must match the format of the loaded
     * AudioSystem.
     */
    ASSample* loadRawSample(const char* name, void* data, unsigned long size);

#ifndef SWIG
    ASSample* loadRawSample(const std::string &name, void* data, unsigned long size);

#endif
    //@}

    //@{
    /**
     * Mix a loaded sample into the output audio stream.
     * \note if samp is NULL, we return false
     * \note if DEBUG_LEVEL > WARNING (2, i.e. 3 or more), we check to see if \a samp
     *        is registered before playing (otherwise we assume it is valid)
     *
     * \param samp the sample to play
     * \param record_after if true, we will start recording when the sample is finished
     * \param record_size the size of the record buffer in seconds
     * \param vol the volume at which to mix the (output) sample
     * \param trackno if non-null, this will be set to the track number assigned to the sample
     * \returns true if there is a free track in which to play the sample
     */
    bool mixSample(ASSample *samp, bool record_after, double record_size = 30.0, float vol = 1.0f, unsigned* trackno = 0);

    bool mixSample(ASSample *samp, float vol = 1.0f, unsigned* trackno = 0);

    //@}

    /**
     * Loop a sample. Stop it with stopSample.
     *
     * \note it makes no sense to record after one of these
     * \param samp the sample to play
     * \param vol the volume at which to mix the sample
     * \param trackno if non-null, this will be set to the track number assigned to the sample
     * \returns true if our mixer supports looping and
     *          there is a free track in which to play the sample
     */
    bool loopSample(ASSample *samp, float vol = 1.0f, unsigned* trackno = 0);

    /**
     * Stop playing instances of a sample.
     *
     * \param samp the sample to stop playing.
     * \param trackno the track (holding a playing \a samp) to stop; -1 for all
     * \returns the number of samples stopped
     */
    unsigned stopSample(ASSample *samp, int trackno = -1);

    /**
     * Pause playing instances of a sample.
     *
     * \param samp the sample to pause
     * \param trackno the track (holding a playing \a samp) to pause; -1 for all
     * \param setpause true to pause, false to unpause

     * \returns the number of samples paused
     */
    unsigned pauseSample(ASSample *samp, int trackno = -1, bool setpause = true);

    /**
     * Unpause paused instances of a sample.
     */
    unsigned unpauseSample(ASSample *samp, int trackno = -1);

    /**
     * Adjust the volume of all playing instances of a sample
     *
     * \param samp the sample to adjust
     * \param vol the volume to set
     * \param trackno the track (holding a playing \a samp) to adjust; -1 for all
     * \returns the number of samples adjusted
     */
    unsigned setVolume(ASSample *samp, float vol = 1.0f, int trackno = -1);

    //@{
    /**
     * Start buffering data from the audio input (microphone) into
     * the (single) recording buffer.
     *
     * \param secondsMax the maximum amount of time we will record for (we will trim the buffer if we record less)
     * \param rtrackno if non-null set to contain the tracknumber we mix into
     * \param recref if non-null a reference to the Recording sample is stored
     * \return true if we were able to start recording (false if we are already recording)
     */
    bool startRec(double secondsMax = 30.0, unsigned *rtrackno = 0, ASSample **recref = 0);
    bool startRec(unsigned *rtrackno, ASSample **recref = 0);
    //@}

    static const RecordOpts RO_DEFAULT;  ///< Default Recording options
    static const RecordOpts RO_DISCARD;  ///< Just stop recording and free resources -- don't save
    static const RecordOpts RO_SAVEONLY; ///< Save to disk, but free resources (return NULL)

    //@{
    /**
     * Start streaming a recording, directly to a file. Using this doesn't require
     * you to specify a maximum length. Also, it doesn't hog RAM.
     *
     * \param file The File (on disk) to stream the recording to
     * \param enc  file encoding to use (defaults to WAV)
     * \param quality encoding quality, where relevant
     * \param rtrackno The track number the recording is placed in
     * \param recref if non-null a reference to the Recording is placed in here
     * \param opts the recording options to use for the streaming
     * \return true if the streaming could start
     */
    bool streamRec(const char *file, const RecordOpts &opts = RO_DEFAULT, unsigned *rtrackno = 0, ASSample **recref = 0);

#ifndef SWIG
    bool streamRec(const char *file, FILE_ENCODING enc, float quality = 2.0, unsigned *rtrackno = 0, ASSample **recref = 0);
    bool streamRec(const char *file, FILE_ENCODING enc, unsigned *rtrackno, ASSample **recref = 0);
    bool streamRec(const std::string &file, const RecordOpts &opts = RO_DEFAULT, unsigned *rtrackno = 0, ASSample **recref = 0);
    bool streamRec(const std::string &file, FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0, unsigned *rtrackno = 0, ASSample **recref = 0);
    bool streamRec(const std::string &file, FILE_ENCODING enc, unsigned *rtrackno, ASSample **recref = 0);
#endif
    //@}

    /**
     * Pause the recording
     *
     * \return true if there is a current recording that was not already paused (and is now paused)
     */
    bool pauseRec(int rtrackno = -1, ASSample *rec = 0);

    /**
     * Resume the recording
     *
     * \return true if there is a currently a paused recording that has now resumed
     */
    bool resumeRec(int rtrackno = -1, ASSample *rec = 0);

    /**
     * Preview the current recording (i.e. mix it into the output stream). It doesn't need to be paused.
     *
     * \param vol the volume at which to mix the recording
     * \param trackno if non-null, set to the track number where the recording will be previewed
     * \param rtrackno the track number of the recording to preview (or, \a rec or, the first one found)
     * \param rec the recording handle to preview (or, do a search)
     * \return true if there is a current recording
     */
    bool previewRec(float vol = 1.0, unsigned* trackno = 0, int rtrackno = -1, ASSample *rec = 0);

    /**
     * Returns true if we are currently recording.
     */
    bool isRecording();

    /**
     * Stop *ALL* audio operations "ASAP"
     */
    void stop();

    /**
     * WAIT for *ALL* audio operations to stop
     */
    void waitstop();

    //@{

    /**
     * Stop buffering and create a sample from the recorded audio.
     *
     * \param name if non-null use \a name to identify the sample in the registry
     *             if null, a name will be generated based on the current time
     * \param rec if non-null search for a particular Recording to stop
     * \param trackno if >= 0 stop the recording in this track number
     * \param save if true, the sample will be saved to disk, using \a name as the filename
     * \param save_in_thread if true, we will start a thread to save the data
     * \param enc  file encoding to use (defaults to WAV)
     * \param quality encoding quality, where relevant
     * \param opts The options to use for this recording
     */
    ASSample* stopRec(const char* name = "", ASSample *rec = 0, int trackno = -1,

                      const RecordOpts &opts = RO_DEFAULT);
#ifndef SWIG
    ASSample* stopRec(const char* name, ASSample *rec, int trackno,
                      bool save, bool save_in_thread = false,
                      FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    ASSample* stopRec(const char* name, bool save, bool save_in_thread,
                      FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    ASSample* stopRec(const std::string &name, ASSample *rec = 0, int trackno = -1,
                      bool save = false, bool save_in_thread = false,
                      FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    ASSample* stopRec(const std::string &name, bool save, bool save_in_thread = false,
                      FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
    ASSample* stopRec(const std::string &name, ASSample *rec = 0, int trackno = -1,
                      const RecordOpts &opts = RO_DEFAULT);
#endif
    //@}

    /** Get the current VU level of the input buffer between 0 and 1.0 */
    float inputVU() const;

    /** return true if we are playing \a samp */
    bool isPlaying(ASSample *samp, int trackno = -1) const;

    /** Return true if the audio system is currently able to play audio (i.e. make sound) */
    bool canPlay() const;

    /** Return true if the audio system is currently able to record sound */
    bool canRecord() const;
};

/**
 * This is an AudioSystem Sample.
 */
class ASSample {
public:
    friend class AudioSystemImpl; ///Allow AudioSystemImpl to delete instances
    friend class PCMSample;
#ifndef SWIG_MUST_PEEK
protected:
#endif
    /** Virtual Destructor -- does nothing, but private --
     * must be deleted by the audio system; NOT YOU! */
    virtual ~ASSample() {}
public:
    /** Number of non-registry references to this sample */
    unsigned refs;
    /** Constructor (refs start at 0) */
    ASSample() : refs(0) {}

    /** Return the number of channels in this sample */
    virtual unsigned getChannels() const = 0;

    /** Return the format of this sample */
    virtual AUDIO_FORMAT getFormat() const = 0;

    /** Return the number of bytes in this sample */
    virtual unsigned long numBytes() const = 0;

    /** Return the number of frames in this sample */
    virtual unsigned long numFrames() const = 0;

    /** Get a pointer to the first byte of this sample */
    virtual const void* getBytes() const = 0;

    /** Return true if, for this sample to be valid, it must be in the AudioSystem registry */
    virtual bool must_register() const = 0;

    /** Get a pointer to an offset into this sample. If \a available is
     * set to less than you want, call mixed() then this again and there may be more.
     * If there is NOT (*available == 0) then the end is reached. Then, calling
     * getBytes() _again_ with offset == 0 you
     * MAY rewind the sample for looping; but if getBytes() returns NULL, then
     * looping is not supported (e.g. streamed samples with a full ringbuffer).
     *
     * \param offset the offset into the sample
     * \param available the number of bytes (<= numBytes()) that can be read past the return
     */
    virtual const void* getOffBytes(unsigned long offset, unsigned long &available) const = 0;

    /** Return the sample rate of this sample */
    virtual double getSamRate() const = 0;

    /** Reample to \a newrate */
    virtual bool resample(double newrate, bool highquality = false) = 0;

    /** Tell this sample that \a frames of its data were mixed by \a handle.
     * This function may by called at the interrupt level so cannot allocate
     * memory.
     */
    virtual void mixed(unsigned long frames,
                       void* handle = 0,
                       int channelOffset = -1,
                       const void *data = 0);

    /**
     * Return the VU for the most recently played frame, with backoff
     */
    virtual float getVU();

    /**
     * Return the duration in milliseconds
     */
    virtual long msDuration() const;

    /**
     * Return the duration in seconds
     */
    virtual double getDuration() const;

    /**
     * Split this sample at the specified location, in milliseconds.
     *
     * \return true if successful
     */

    virtual bool split(unsigned long where, ASSample **start, ASSample **end) const;

    /**
     * Perform a "destructive" split -- \a this sample is truncated at \a where
     * and we return a new sample consisting of the truncated portion.
     */
    virtual ASSample* destructive_split(unsigned long where);

    /**
     * Remove any stretches of silences from this sample. A silence is defined
     * as a window of \a window frames (pun!) worth of audio that doesn't
     * go above \a threshold times whatever the "maximum" is for the particular
     * data type used by this sample.
     */
    virtual long remove_silence(float threshold = 0.02, unsigned window = 1000);

    //@{
    /**
     * Save this sample to \a path
     *
     * \param path the file path on disk to save the file to
     * \param enc the file encoding (container | format) to use
     * \param quality the quality to use, if relevant
     * \return true if sucessful
     */
    bool save(const char* path, FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);

#ifndef SWIG
    virtual bool save(const std::string &path, FILE_ENCODING enc = AF_ENC_WAV, float quality = 2.0);
#endif
    //@}

};

/** \example record_basic.cpp
 * Basic Recording
 */

/** \example record.cpp
 * Extended Recording
 */

/** \example volumes.cpp
 * Playing with given volumes
 */

/**
 * \example mixing.cpp
 * Mixing multiple tracks
 */

/** \example loopmix.cpp
 * Mixing and looping
 */

/** \example pytests.py.in
 * Python bindings
 */

/** \example rbtests.rb.in
 * Ruby bindings
 */

#endif

SourceForge.net Logo