/* AAC Decoder Transform * * Copyright 2022 RĂ©mi Bernon for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include "gst_private.h" #include "mfapi.h" #include "mferror.h" #include "mfobjects.h" #include "mftransform.h" #include "wmcodecdsp.h" #include "wine/debug.h" WINE_DEFAULT_DEBUG_CHANNEL(mfplat); WINE_DECLARE_DEBUG_CHANNEL(winediag); static struct { const GUID *const guid; UINT32 payload_type; } aac_decoder_input_types[] = { {&MFAudioFormat_AAC, 0}, {&MFAudioFormat_RAW_AAC, -1}, {&MFAudioFormat_AAC, 1}, {&MFAudioFormat_AAC, 3}, {&MFAudioFormat_ADTS, -1}, }; static const GUID *const aac_decoder_output_types[] = { &MFAudioFormat_PCM, &MFAudioFormat_Float, }; struct aac_decoder { IMFTransform IMFTransform_iface; LONG refcount; IMFMediaType *input_type; IMFMediaType *output_type; struct wg_transform *wg_transform; struct wg_sample_queue *wg_sample_queue; }; static struct aac_decoder *impl_from_IMFTransform(IMFTransform *iface) { return CONTAINING_RECORD(iface, struct aac_decoder, IMFTransform_iface); } static HRESULT try_create_wg_transform(struct aac_decoder *decoder) { struct wg_format input_format, output_format; if (decoder->wg_transform) wg_transform_destroy(decoder->wg_transform); decoder->wg_transform = NULL; mf_media_type_to_wg_format(decoder->input_type, &input_format); if (input_format.major_type == WG_MAJOR_TYPE_UNKNOWN) return MF_E_INVALIDMEDIATYPE; mf_media_type_to_wg_format(decoder->output_type, &output_format); if (output_format.major_type == WG_MAJOR_TYPE_UNKNOWN) return MF_E_INVALIDMEDIATYPE; if (!(decoder->wg_transform = wg_transform_create(&input_format, &output_format))) return E_FAIL; return S_OK; } static HRESULT WINAPI transform_QueryInterface(IMFTransform *iface, REFIID iid, void **out) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out); if (IsEqualGUID(iid, &IID_IUnknown) || IsEqualGUID(iid, &IID_IMFTransform)) *out = &decoder->IMFTransform_iface; else { *out = NULL; WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); return E_NOINTERFACE; } IUnknown_AddRef((IUnknown *)*out); return S_OK; } static ULONG WINAPI transform_AddRef(IMFTransform *iface) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); ULONG refcount = InterlockedIncrement(&decoder->refcount); TRACE("iface %p increasing refcount to %lu.\n", decoder, refcount); return refcount; } static ULONG WINAPI transform_Release(IMFTransform *iface) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); ULONG refcount = InterlockedDecrement(&decoder->refcount); TRACE("iface %p decreasing refcount to %lu.\n", decoder, refcount); if (!refcount) { if (decoder->wg_transform) wg_transform_destroy(decoder->wg_transform); if (decoder->input_type) IMFMediaType_Release(decoder->input_type); if (decoder->output_type) IMFMediaType_Release(decoder->output_type); wg_sample_queue_destroy(decoder->wg_sample_queue); free(decoder); } return refcount; } static HRESULT WINAPI transform_GetStreamLimits(IMFTransform *iface, DWORD *input_minimum, DWORD *input_maximum, DWORD *output_minimum, DWORD *output_maximum) { TRACE("iface %p, input_minimum %p, input_maximum %p, output_minimum %p, output_maximum %p.\n", iface, input_minimum, input_maximum, output_minimum, output_maximum); *input_minimum = *input_maximum = *output_minimum = *output_maximum = 1; return S_OK; } static HRESULT WINAPI transform_GetStreamCount(IMFTransform *iface, DWORD *inputs, DWORD *outputs) { TRACE("iface %p, inputs %p, outputs %p.\n", iface, inputs, outputs); *inputs = *outputs = 1; return S_OK; } static HRESULT WINAPI transform_GetStreamIDs(IMFTransform *iface, DWORD input_size, DWORD *inputs, DWORD output_size, DWORD *outputs) { TRACE("iface %p, input_size %lu, inputs %p, output_size %lu, outputs %p.\n", iface, input_size, inputs, output_size, outputs); return E_NOTIMPL; } static HRESULT WINAPI transform_GetInputStreamInfo(IMFTransform *iface, DWORD id, MFT_INPUT_STREAM_INFO *info) { TRACE("iface %p, id %#lx, info %p.\n", iface, id, info); if (id) return MF_E_INVALIDSTREAMNUMBER; memset(info, 0, sizeof(*info)); info->dwFlags = MFT_INPUT_STREAM_WHOLE_SAMPLES | MFT_INPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER | MFT_INPUT_STREAM_FIXED_SAMPLE_SIZE | MFT_INPUT_STREAM_HOLDS_BUFFERS; return S_OK; } static HRESULT WINAPI transform_GetOutputStreamInfo(IMFTransform *iface, DWORD id, MFT_OUTPUT_STREAM_INFO *info) { TRACE("iface %p, id %#lx, info %p.\n", iface, id, info); if (id) return MF_E_INVALIDSTREAMNUMBER; memset(info, 0, sizeof(*info)); info->dwFlags = MFT_INPUT_STREAM_WHOLE_SAMPLES; info->cbSize = 0xc000; return S_OK; } static HRESULT WINAPI transform_GetAttributes(IMFTransform *iface, IMFAttributes **attributes) { TRACE("iface %p, attributes %p.\n", iface, attributes); return E_NOTIMPL; } static HRESULT WINAPI transform_GetInputStreamAttributes(IMFTransform *iface, DWORD id, IMFAttributes **attributes) { TRACE("iface %p, id %#lx, attributes %p.\n", iface, id, attributes); return E_NOTIMPL; } static HRESULT WINAPI transform_GetOutputStreamAttributes(IMFTransform *iface, DWORD id, IMFAttributes **attributes) { TRACE("iface %p, id %#lx, attributes %p.\n", iface, id, attributes); return E_NOTIMPL; } static HRESULT WINAPI transform_DeleteInputStream(IMFTransform *iface, DWORD id) { TRACE("iface %p, id %#lx.\n", iface, id); return E_NOTIMPL; } static HRESULT WINAPI transform_AddInputStreams(IMFTransform *iface, DWORD streams, DWORD *ids) { TRACE("iface %p, streams %lu, ids %p.\n", iface, streams, ids); return E_NOTIMPL; } static HRESULT WINAPI transform_GetInputAvailableType(IMFTransform *iface, DWORD id, DWORD index, IMFMediaType **type) { IMFMediaType *media_type; const GUID *subtype; HRESULT hr; TRACE("iface %p, id %#lx, index %#lx, type %p.\n", iface, id, index, type); if (id) return MF_E_INVALIDSTREAMNUMBER; *type = NULL; if (index >= ARRAY_SIZE(aac_decoder_input_types)) return MF_E_NO_MORE_TYPES; subtype = aac_decoder_input_types[index].guid; if (FAILED(hr = MFCreateMediaType(&media_type))) return hr; if (FAILED(hr = IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Audio))) goto done; if (FAILED(hr = IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, subtype))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_BITS_PER_SAMPLE, 32))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, 6))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_BLOCK_ALIGNMENT, 24))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, 48000))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 1152000))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_PREFER_WAVEFORMATEX, 1))) goto done; if (IsEqualGUID(subtype, &MFAudioFormat_AAC)) { if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 0))) goto done; if (aac_decoder_input_types[index].payload_type != -1 && FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, aac_decoder_input_types[index].payload_type))) goto done; } done: if (SUCCEEDED(hr)) IMFMediaType_AddRef((*type = media_type)); IMFMediaType_Release(media_type); return hr; } static HRESULT WINAPI transform_GetOutputAvailableType(IMFTransform *iface, DWORD id, DWORD index, IMFMediaType **type) { UINT32 channel_count, sample_size, sample_rate, block_alignment; struct aac_decoder *decoder = impl_from_IMFTransform(iface); IMFMediaType *media_type; const GUID *output_type; HRESULT hr; TRACE("iface %p, id %#lx, index %#lx, type %p.\n", iface, id, index, type); if (id) return MF_E_INVALIDSTREAMNUMBER; if (!decoder->input_type) return MF_E_TRANSFORM_TYPE_NOT_SET; *type = NULL; if (index >= ARRAY_SIZE(aac_decoder_output_types)) return MF_E_NO_MORE_TYPES; index = ARRAY_SIZE(aac_decoder_output_types) - index - 1; output_type = aac_decoder_output_types[index]; if (FAILED(hr = MFCreateMediaType(&media_type))) return hr; if (FAILED(hr = IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Audio))) goto done; if (FAILED(hr = IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, output_type))) goto done; if (IsEqualGUID(output_type, &MFAudioFormat_Float)) sample_size = 32; else if (IsEqualGUID(output_type, &MFAudioFormat_PCM)) sample_size = 16; else { FIXME("Subtype %s not implemented!\n", debugstr_guid(output_type)); hr = E_NOTIMPL; goto done; } if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_BITS_PER_SAMPLE, sample_size))) goto done; if (FAILED(hr = IMFMediaType_GetUINT32(decoder->input_type, &MF_MT_AUDIO_NUM_CHANNELS, &channel_count))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, channel_count))) goto done; if (FAILED(hr = IMFMediaType_GetUINT32(decoder->input_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &sample_rate))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, sample_rate))) goto done; block_alignment = sample_size * channel_count / 8; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_BLOCK_ALIGNMENT, block_alignment))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, sample_rate * block_alignment))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_ALL_SAMPLES_INDEPENDENT, 1))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_FIXED_SIZE_SAMPLES, 1))) goto done; if (FAILED(hr = IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_PREFER_WAVEFORMATEX, 1))) goto done; done: if (SUCCEEDED(hr)) IMFMediaType_AddRef((*type = media_type)); IMFMediaType_Release(media_type); return hr; } static HRESULT WINAPI transform_SetInputType(IMFTransform *iface, DWORD id, IMFMediaType *type, DWORD flags) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); MF_ATTRIBUTE_TYPE item_type; GUID major, subtype; HRESULT hr; ULONG i; TRACE("iface %p, id %#lx, type %p, flags %#lx.\n", iface, id, type, flags); if (id) return MF_E_INVALIDSTREAMNUMBER; if (FAILED(hr = IMFMediaType_GetGUID(type, &MF_MT_MAJOR_TYPE, &major))) return E_INVALIDARG; if (!IsEqualGUID(&major, &MFMediaType_Audio) || FAILED(hr = IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) return MF_E_INVALIDMEDIATYPE; for (i = 0; i < ARRAY_SIZE(aac_decoder_input_types); ++i) if (IsEqualGUID(&subtype, aac_decoder_input_types[i].guid)) break; if (i == ARRAY_SIZE(aac_decoder_input_types)) return MF_E_INVALIDMEDIATYPE; if (FAILED(IMFMediaType_GetItemType(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &item_type)) || item_type != MF_ATTRIBUTE_UINT32) return MF_E_INVALIDMEDIATYPE; if (FAILED(IMFMediaType_GetItemType(type, &MF_MT_USER_DATA, &item_type)) || item_type != MF_ATTRIBUTE_BLOB) return MF_E_INVALIDMEDIATYPE; if (flags & MFT_SET_TYPE_TEST_ONLY) return S_OK; if (!decoder->input_type && FAILED(hr = MFCreateMediaType(&decoder->input_type))) return hr; if (decoder->output_type) { IMFMediaType_Release(decoder->output_type); decoder->output_type = NULL; } return IMFMediaType_CopyAllItems(type, (IMFAttributes *)decoder->input_type); } static HRESULT WINAPI transform_SetOutputType(IMFTransform *iface, DWORD id, IMFMediaType *type, DWORD flags) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); MF_ATTRIBUTE_TYPE item_type; GUID major, subtype; HRESULT hr; ULONG i; TRACE("iface %p, id %#lx, type %p, flags %#lx.\n", iface, id, type, flags); if (id) return MF_E_INVALIDSTREAMNUMBER; if (!decoder->input_type) return MF_E_TRANSFORM_TYPE_NOT_SET; if (FAILED(hr = IMFMediaType_GetGUID(type, &MF_MT_MAJOR_TYPE, &major)) || FAILED(hr = IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) return hr; if (!IsEqualGUID(&major, &MFMediaType_Audio)) return MF_E_INVALIDMEDIATYPE; for (i = 0; i < ARRAY_SIZE(aac_decoder_output_types); ++i) if (IsEqualGUID(&subtype, aac_decoder_output_types[i])) break; if (i == ARRAY_SIZE(aac_decoder_output_types)) return MF_E_INVALIDMEDIATYPE; if (FAILED(IMFMediaType_GetItemType(type, &MF_MT_AUDIO_BITS_PER_SAMPLE, &item_type)) || item_type != MF_ATTRIBUTE_UINT32) return MF_E_INVALIDMEDIATYPE; if (FAILED(IMFMediaType_GetItemType(type, &MF_MT_AUDIO_NUM_CHANNELS, &item_type)) || item_type != MF_ATTRIBUTE_UINT32) return MF_E_INVALIDMEDIATYPE; if (FAILED(IMFMediaType_GetItemType(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &item_type)) || item_type != MF_ATTRIBUTE_UINT32) return MF_E_INVALIDMEDIATYPE; if (flags & MFT_SET_TYPE_TEST_ONLY) return S_OK; if (!decoder->output_type && FAILED(hr = MFCreateMediaType(&decoder->output_type))) return hr; if (FAILED(hr = IMFMediaType_CopyAllItems(type, (IMFAttributes *)decoder->output_type))) return hr; if (FAILED(hr = try_create_wg_transform(decoder))) goto failed; return S_OK; failed: IMFMediaType_Release(decoder->output_type); decoder->output_type = NULL; return hr; } static HRESULT WINAPI transform_GetInputCurrentType(IMFTransform *iface, DWORD id, IMFMediaType **out) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); IMFMediaType *type; HRESULT hr; TRACE("iface %p, id %#lx, out %p.\n", iface, id, out); if (id) return MF_E_INVALIDSTREAMNUMBER; if (!decoder->input_type) return MF_E_TRANSFORM_TYPE_NOT_SET; if (FAILED(hr = MFCreateMediaType(&type))) return hr; if (SUCCEEDED(hr = IMFMediaType_CopyAllItems(decoder->input_type, (IMFAttributes *)type))) IMFMediaType_AddRef(*out = type); IMFMediaType_Release(type); return hr; } static HRESULT WINAPI transform_GetOutputCurrentType(IMFTransform *iface, DWORD id, IMFMediaType **out) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); IMFMediaType *type; HRESULT hr; TRACE("iface %p, id %#lx, out %p.\n", iface, id, out); if (id) return MF_E_INVALIDSTREAMNUMBER; if (!decoder->output_type) return MF_E_TRANSFORM_TYPE_NOT_SET; if (FAILED(hr = MFCreateMediaType(&type))) return hr; if (SUCCEEDED(hr = IMFMediaType_CopyAllItems(decoder->output_type, (IMFAttributes *)type))) IMFMediaType_AddRef(*out = type); IMFMediaType_Release(type); return hr; } static HRESULT WINAPI transform_GetInputStatus(IMFTransform *iface, DWORD id, DWORD *flags) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); bool accepts_input; HRESULT hr; TRACE("iface %p, id %#lx, flags %p.\n", iface, id, flags); if (!decoder->wg_transform) return MF_E_TRANSFORM_TYPE_NOT_SET; if (FAILED(hr = wg_transform_get_status(decoder->wg_transform, &accepts_input))) return hr; *flags = accepts_input ? MFT_INPUT_STATUS_ACCEPT_DATA : 0; return S_OK; } static HRESULT WINAPI transform_GetOutputStatus(IMFTransform *iface, DWORD *flags) { FIXME("iface %p, flags %p stub!\n", iface, flags); return E_NOTIMPL; } static HRESULT WINAPI transform_SetOutputBounds(IMFTransform *iface, LONGLONG lower, LONGLONG upper) { TRACE("iface %p, lower %I64d, upper %I64d.\n", iface, lower, upper); return E_NOTIMPL; } static HRESULT WINAPI transform_ProcessEvent(IMFTransform *iface, DWORD id, IMFMediaEvent *event) { FIXME("iface %p, id %#lx, event %p stub!\n", iface, id, event); return E_NOTIMPL; } static HRESULT WINAPI transform_ProcessMessage(IMFTransform *iface, MFT_MESSAGE_TYPE message, ULONG_PTR param) { FIXME("iface %p, message %#x, param %p stub!\n", iface, message, (void *)param); return S_OK; } static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFSample *sample, DWORD flags) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); TRACE("iface %p, id %#lx, sample %p, flags %#lx.\n", iface, id, sample, flags); if (!decoder->wg_transform) return MF_E_TRANSFORM_TYPE_NOT_SET; return wg_transform_push_mf(decoder->wg_transform, sample, decoder->wg_sample_queue); } static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, DWORD count, MFT_OUTPUT_DATA_BUFFER *samples, DWORD *status) { struct aac_decoder *decoder = impl_from_IMFTransform(iface); MFT_OUTPUT_STREAM_INFO info; HRESULT hr; TRACE("iface %p, flags %#lx, count %lu, samples %p, status %p.\n", iface, flags, count, samples, status); if (count != 1) return E_INVALIDARG; if (!decoder->wg_transform) return MF_E_TRANSFORM_TYPE_NOT_SET; *status = samples->dwStatus = 0; if (!samples->pSample) return E_INVALIDARG; if (FAILED(hr = IMFTransform_GetOutputStreamInfo(iface, 0, &info))) return hr; if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, samples->pSample, info.cbSize, NULL, &samples->dwStatus))) wg_sample_queue_flush(decoder->wg_sample_queue, false); else samples->dwStatus = MFT_OUTPUT_DATA_BUFFER_NO_SAMPLE; return hr; } static const IMFTransformVtbl transform_vtbl = { transform_QueryInterface, transform_AddRef, transform_Release, transform_GetStreamLimits, transform_GetStreamCount, transform_GetStreamIDs, transform_GetInputStreamInfo, transform_GetOutputStreamInfo, transform_GetAttributes, transform_GetInputStreamAttributes, transform_GetOutputStreamAttributes, transform_DeleteInputStream, transform_AddInputStreams, transform_GetInputAvailableType, transform_GetOutputAvailableType, transform_SetInputType, transform_SetOutputType, transform_GetInputCurrentType, transform_GetOutputCurrentType, transform_GetInputStatus, transform_GetOutputStatus, transform_SetOutputBounds, transform_ProcessEvent, transform_ProcessMessage, transform_ProcessInput, transform_ProcessOutput, }; HRESULT aac_decoder_create(REFIID riid, void **ret) { static const struct wg_format output_format = { .major_type = WG_MAJOR_TYPE_AUDIO, .u.audio = { .format = WG_AUDIO_FORMAT_F32LE, .channel_mask = 1, .channels = 1, .rate = 44100, }, }; static const struct wg_format input_format = {.major_type = WG_MAJOR_TYPE_AUDIO_MPEG4}; struct wg_transform *transform; struct aac_decoder *decoder; HRESULT hr; TRACE("riid %s, ret %p.\n", debugstr_guid(riid), ret); if (!(transform = wg_transform_create(&input_format, &output_format))) { ERR_(winediag)("GStreamer doesn't support WMA decoding, please install appropriate plugins\n"); return E_FAIL; } wg_transform_destroy(transform); if (!(decoder = calloc(1, sizeof(*decoder)))) return E_OUTOFMEMORY; if (FAILED(hr = wg_sample_queue_create(&decoder->wg_sample_queue))) { free(decoder); return hr; } decoder->IMFTransform_iface.lpVtbl = &transform_vtbl; decoder->refcount = 1; *ret = &decoder->IMFTransform_iface; TRACE("Created decoder %p\n", *ret); return S_OK; }