From d41020270980c5858d4ef8fd0f42611a8afc7139 Mon Sep 17 00:00:00 2001 From: Elysia <71698422+aiko-chan-ai@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:19:21 +0700 Subject: [PATCH] feat(VoiceReceiver): Recording video with audio Full implemented --- examples/VoiceChannel/RecordingVideo.js | 4 ++- .../voice/dispatcher/AudioDispatcher.js | 3 +- src/client/voice/networking/VoiceUDPClient.js | 10 +----- src/client/voice/receiver/FFmpegHandler.js | 27 +++++++++++--- src/client/voice/receiver/PacketHandler.js | 22 ++++++++++-- src/client/voice/receiver/Receiver.js | 13 ++++--- src/util/Util.js | 36 ++++++++----------- typings/index.d.ts | 10 ++++-- 8 files changed, 76 insertions(+), 49 deletions(-) diff --git a/examples/VoiceChannel/RecordingVideo.js b/examples/VoiceChannel/RecordingVideo.js index e50c1cc..0434366 100644 --- a/examples/VoiceChannel/RecordingVideo.js +++ b/examples/VoiceChannel/RecordingVideo.js @@ -29,7 +29,9 @@ client.on('ready', async client => { const video = connectionStream.receiver.createVideoStream('user_id', { portUdp: 5004, - output: fs.createWriteStream('video.ts'), // Output file using MPEG-TS container + output: fs.createWriteStream('video.mkv'), // Output file using matroska container + // If you want video with audio, set isEnableAudio to true + isEnableAudio: false, }); video.stream.stderr.on('data', data => { diff --git a/src/client/voice/dispatcher/AudioDispatcher.js b/src/client/voice/dispatcher/AudioDispatcher.js index de47407..40c487a 100644 --- a/src/client/voice/dispatcher/AudioDispatcher.js +++ b/src/client/voice/dispatcher/AudioDispatcher.js @@ -1,6 +1,7 @@ 'use strict'; const BaseDispatcher = require('./BaseDispatcher'); +const Util = require('../../../util/Util'); const Silence = require('../util/Silence'); const VolumeInterface = require('../util/VolumeInterface'); @@ -24,7 +25,7 @@ const VolumeInterface = require('../util/VolumeInterface'); class AudioDispatcher extends BaseDispatcher { constructor(player, { seek = 0, volume = 1, fec, plp, bitrate = 96, highWaterMark = 12 } = {}, streams) { const streamOptions = { seek, volume, fec, plp, bitrate, highWaterMark }; - super(player, highWaterMark, 120, false, streams); + super(player, highWaterMark, Util.getPayloadType('opus'), false, streams); this.streamOptions = streamOptions; diff --git a/src/client/voice/networking/VoiceUDPClient.js b/src/client/voice/networking/VoiceUDPClient.js index c437548..dd8f238 100644 --- a/src/client/voice/networking/VoiceUDPClient.js +++ b/src/client/voice/networking/VoiceUDPClient.js @@ -126,15 +126,7 @@ class VoiceConnectionUDPClient extends EventEmitter { op: VoiceOpcodes.SELECT_PROTOCOL, d: { protocol: 'udp', - codecs: [ - { - name: 'opus', - type: 'audio', - priority: 1000, - payload_type: 120, - }, - ...Util.getAllPayloadType(), - ], + codecs: Util.getAllPayloadType(), data: { address: packet.address, port: packet.port, diff --git a/src/client/voice/receiver/FFmpegHandler.js b/src/client/voice/receiver/FFmpegHandler.js index 0d0b664..4bda34e 100644 --- a/src/client/voice/receiver/FFmpegHandler.js +++ b/src/client/voice/receiver/FFmpegHandler.js @@ -15,9 +15,15 @@ const { StreamOutput } = require('../util/Socket'); * @extends {EventEmitter} */ class FFmpegHandler extends EventEmitter { - constructor(codec, portUdp, output) { + constructor(codec, portUdp, output, isEnableAudio) { super(); + /** + * If the audio is enabled + * @type {boolean} + */ + this.isEnableAudio = isEnableAudio; + /** * The codec of the stream * @type {VideoCodec} @@ -41,7 +47,8 @@ class FFmpegHandler extends EventEmitter { */ this.output = output; - const sdpData = Util.getSDPCodecName(codec, portUdp); + const sdpData = Util.getSDPCodecName(portUdp, this.isEnableAudio); + /** * The FFmpeg process is ready or not * @type {boolean} @@ -70,8 +77,8 @@ class FFmpegHandler extends EventEmitter { '-max_delay', '500000', '-y', - '-f', // Specify the format - 'mpegts', // MKV format + '-f', + 'matroska', isStream ? this.outputStream.url : output, ]); @@ -88,14 +95,17 @@ class FFmpegHandler extends EventEmitter { this.emit('ready'); }); this.socket = createSocket('udp4'); + this.socketAudio = createSocket('udp4'); } /** * Send a payload to FFmpeg via UDP * @param {Buffer} payload The payload + * @param {boolean} isAudio If the payload is audio * @param {*} callback Callback */ sendPayloadToFFmpeg( payload, + isAudio = false, callback = e => { if (e) { console.error('Error sending packet:', e); @@ -103,7 +113,14 @@ class FFmpegHandler extends EventEmitter { }, ) { const message = Buffer.from(payload); - this.socket.send(message, 0, message.length, this.portUdp, '127.0.0.1', callback); + if (isAudio && !this.isEnableAudio) { + return; + } + if (isAudio) { + this.socketAudio.send(message, 0, message.length, this.portUdp + 2, '127.0.0.1', callback); + } else { + this.socket.send(message, 0, message.length, this.portUdp, '127.0.0.1', callback); + } } destroy() { diff --git a/src/client/voice/receiver/PacketHandler.js b/src/client/voice/receiver/PacketHandler.js index 7e1e741..0e133f7 100644 --- a/src/client/voice/receiver/PacketHandler.js +++ b/src/client/voice/receiver/PacketHandler.js @@ -56,9 +56,9 @@ class PacketHandler extends EventEmitter { return stream; } - makeVideoStream(user, portUdp, codec = 'H264', output) { + makeVideoStream(user, portUdp, codec, output, isEnableAudio = false) { if (this.videoStreams.has(user)) return this.videoStreams.get(user); - const stream = new FFmpegHandler(codec, portUdp, output); + const stream = new FFmpegHandler(codec, portUdp, output, isEnableAudio); stream.on('ready', () => { this.videoStreams.set(user, stream); }); @@ -186,6 +186,21 @@ class PacketHandler extends EventEmitter { } } + audioReceiverForStream(buffer) { + const ssrc = buffer.readUInt32BE(8); + const userStat = this.connection.ssrcMap.get(ssrc); // Audio_ssrc + if (!userStat) return; + const streamInfo = this.videoStreams.get(userStat.userId); + if (!streamInfo) return; + const packet = this.parseBuffer(buffer, true); + if (packet instanceof Error) { + return; + } + if (streamInfo.isEnableAudio) { + streamInfo.sendPayloadToFFmpeg(Buffer.concat(packet), true); + } + } + videoReceiver(buffer) { const ssrc = buffer.readUInt32BE(8); const userStat = this.connection.ssrcMap.get(ssrc - 1); // Video_ssrc @@ -203,7 +218,7 @@ class PacketHandler extends EventEmitter { // If this is a silence frame, pretend we never received it return; } - this.receiver.emit('videoData', ssrc, userStat, header, videoPacket); + this.receiver.emit('videoData', ssrc - 1, userStat, header, videoPacket); if (streamInfo) { streamInfo.sendPayloadToFFmpeg(Buffer.concat(packet)); @@ -214,6 +229,7 @@ class PacketHandler extends EventEmitter { push(buffer) { this.audioReceiver(buffer); this.videoReceiver(buffer); + this.audioReceiverForStream(buffer); } } diff --git a/src/client/voice/receiver/Receiver.js b/src/client/voice/receiver/Receiver.js index 9f61384..4b13765 100644 --- a/src/client/voice/receiver/Receiver.js +++ b/src/client/voice/receiver/Receiver.js @@ -57,10 +57,10 @@ class VoiceReceiver extends EventEmitter { /** * Options passed to `VoiceReceiver#createVideoStream`. * @typedef {Object} ReceiveVideoStreamOptions - * @property {number} [portUdp] The UDP port to use for the video stream (local stream). - * @property {string} [codec='H264'] The codec to use for encoding the video. Default is 'H264'. - * H265 supported, but not implemented - * @property {any} [output] Additional output options, as required. + * @property {number} portUdp The UDP port to use for the video stream (local stream). + * @property {WritableStream|string} output Output stream or file path to write the video stream to. + * @property {boolean} [isEnableAudio=false] Enable audio for the video stream. + * If you intend to record the stream with audio, make sure that `portUdp` and `portUdp + 2` are not in use. */ /** @@ -71,11 +71,10 @@ class VoiceReceiver extends EventEmitter { * @param {ReceiveVideoStreamOptions} options Options. * @returns {FFmpegHandler} The video stream for the specified user. */ - createVideoStream(user, { portUdp, codec, output } = {}) { + createVideoStream(user, { portUdp, output, isEnableAudio = false } = {}) { user = this.connection.client.users.resolve(user); if (!user) throw new Error('VOICE_USER_MISSING'); - codec = 'H264'; - const stream = this.packets.makeVideoStream(user.id, portUdp, codec, output); + const stream = this.packets.makeVideoStream(user.id, portUdp, 'H264', output, isEnableAudio); return stream; } diff --git a/src/util/Util.js b/src/util/Util.js index 11ffb26..b2430c2 100644 --- a/src/util/Util.js +++ b/src/util/Util.js @@ -944,23 +944,23 @@ class Util extends null { return payloadTypes.find(p => p.name === codecName).payload_type; } - static getSDPCodecName(packet, portUdp) { - let payload, payloadType; - if (typeof packet === 'string') { - payload = payloadTypes.find(p => p.name === packet); - payloadType = payload.payload_type; - } else { - const payloadType = packet[1] > 120 ? packet[1] & 0x80 : packet[1]; - payload = payloadTypes.find(p => p.payload_type === payloadType); - } - let sdpData = `o=- 0 0 IN IP4 127.0.0.1 -s=No Name -c=IN IP4 127.0.0.1 + static getSDPCodecName(portUdp, isEnableAudio) { + let sdpData = `v=0 +o=- 0 0 IN IP4 0.0.0.0 +s=- +c=IN IP4 0.0.0.0 t=0 0 a=tool:libavformat 61.1.100 -m=video ${portUdp} RTP/AVP ${payloadType} -a=rtpmap:${payloadType} ${payload.name}/90000 -#Placeholder +m=video ${portUdp} RTP/AVP 105 +a=rtpmap:105 H264/90000 +a=fmtp:105 profile-level-id=42e01f;sprop-parameter-sets=Z0IAH6tAoAt2AtwEBAaQeJEV,aM4JyA==;packetization-mode=1 +${ + isEnableAudio + ? `m=audio ${portUdp + 2} RTP/AVP 120 +a=rtpmap:120 opus/48000/2 +a=fmtp:120 minptime=10;useinbandfec=1` + : '' +} a=extmap:1 urn:ietf:params:rtp-hdrext:ssrc-audio-level a=extmap:2 http://www.webrtc.org/experiments/rtp-hdrext/abs-send-time a=extmap:3 http://www.ietf.org/id/draft-holmer-rmcat-transport-wide-cc-extensions-01 @@ -974,12 +974,6 @@ a=extmap:11 urn:ietf:params:rtp-hdrext:sdes:repaired-rtp-stream-id a=extmap:13 urn:3gpp:video-orientation a=extmap:14 urn:ietf:params:rtp-hdrext:toffset `; - if (payload.name === 'H264') { - sdpData = sdpData.replace( - '#Placeholder', - `a=fmtp:${payloadType} profile-level-id=42e01f;sprop-parameter-sets=Z0IAH6tAoAt2AtwEBAaQeJEV,aM4JyA==;packetization-mode=1`, - ); - } return sdpData; } } diff --git a/typings/index.d.ts b/typings/index.d.ts index 619ee13..684071c 100644 --- a/typings/index.d.ts +++ b/typings/index.d.ts @@ -1105,8 +1105,10 @@ export class FFmpegHandler extends EventEmitter { public ready: boolean; public stream: ChildProcessWithoutNullStreams; public socket: Socket; + public socketAudio: Socket; public output: Writable | string; - public sendPayloadToFFmpeg(payload: Buffer): void; + public isEnableAudio: boolean; + public sendPayloadToFFmpeg(payload: Buffer, isAudio?: boolean): void; public on(event: 'ready', listener: () => void): this; public once(event: 'ready', listener: () => void): this; public destroy(): void; @@ -1117,7 +1119,11 @@ export class VoiceReceiver extends EventEmitter { public createStream(user: UserResolvable, options?: { mode?: 'opus' | 'pcm'; end?: 'silence' | 'manual' }): Readable; public createVideoStream( user: UserResolvable, - options?: { portUdp: number; codec: 'H264'; output: Writable | string }, + options?: { + portUdp: number; + output: Writable | string; + isEnableAudio: boolean; + }, ): FFmpegHandler; public on(event: 'debug', listener: (error: Error | string) => void): this;