feat: try implement djs voice + video (v12)

2024-07-24 19:27:50 +07:00
parent 7fa4666df0
commit 26aa85c126
31 changed files with 3768 additions and 9 deletions
--- a/src/client/voice/receiver/PacketHandler.js
+++ b/src/client/voice/receiver/PacketHandler.js
@@ -0,0 +1,136 @@
+'use strict';
+
+const EventEmitter = require('events');
+const { Buffer } = require('node:buffer');
+const { setTimeout } = require('node:timers');
+const Speaking = require('../../../util/Speaking');
+const secretbox = require('../util/Secretbox');
+const { SILENCE_FRAME } = require('../util/Silence');
+
+// The delay between packets when a user is considered to have stopped speaking
+// https://github.com/discordjs/discord.js/issues/3524#issuecomment-540373200
+const DISCORD_SPEAKING_DELAY = 250;
+
+class Readable extends require('stream').Readable {
+  _read() {} // eslint-disable-line no-empty-function
+}
+
+class PacketHandler extends EventEmitter {
+  constructor(receiver) {
+    super();
+    this.nonce = Buffer.alloc(24);
+    this.receiver = receiver;
+    this.streams = new Map();
+    this.speakingTimeouts = new Map();
+  }
+
+  get connection() {
+    return this.receiver.connection;
+  }
+
+  _stoppedSpeaking(userId) {
+    const streamInfo = this.streams.get(userId);
+    if (streamInfo && streamInfo.end === 'silence') {
+      this.streams.delete(userId);
+      streamInfo.stream.push(null);
+    }
+  }
+
+  makeStream(user, end) {
+    if (this.streams.has(user)) return this.streams.get(user).stream;
+    const stream = new Readable();
+    stream.on('end', () => this.streams.delete(user));
+    this.streams.set(user, { stream, end });
+    return stream;
+  }
+
+  parseBuffer(buffer) {
+    const { secret_key, mode } = this.receiver.connection.authentication;
+
+    // Choose correct nonce depending on encryption
+    let end;
+    if (mode === 'xsalsa20_poly1305_lite') {
+      buffer.copy(this.nonce, 0, buffer.length - 4);
+      end = buffer.length - 4;
+    } else if (mode === 'xsalsa20_poly1305_suffix') {
+      buffer.copy(this.nonce, 0, buffer.length - 24);
+      end = buffer.length - 24;
+    } else {
+      buffer.copy(this.nonce, 0, 0, 12);
+    }
+
+    // Open packet
+    let packet = secretbox.methods.open(buffer.slice(12, end), this.nonce, secret_key);
+    if (!packet) return new Error('Failed to decrypt voice packet');
+    packet = Buffer.from(packet);
+
+    // Strip RTP Header Extensions (one-byte only)
+    if (packet[0] === 0xbe && packet[1] === 0xde) {
+      const headerExtensionLength = packet.readUInt16BE(2);
+      packet = packet.subarray(4 + 4 * headerExtensionLength);
+    }
+
+    return packet;
+  }
+
+  push(buffer) {
+    const ssrc = buffer.readUInt32BE(8);
+    const userStat = this.connection.ssrcMap.get(ssrc);
+
+    if (!userStat) return;
+
+    let opusPacket;
+    const streamInfo = this.streams.get(userStat.userId);
+    // If the user is in video, we need to check if the packet is just silence
+    if (userStat.hasVideo) {
+      opusPacket = this.parseBuffer(buffer);
+      if (opusPacket instanceof Error) {
+        // Only emit an error if we were actively receiving packets from this user
+        if (streamInfo) {
+          this.emit('error', opusPacket);
+          return;
+        }
+      }
+      if (SILENCE_FRAME.equals(opusPacket)) {
+        // If this is a silence frame, pretend we never received it
+        return;
+      }
+    }
+
+    let speakingTimeout = this.speakingTimeouts.get(ssrc);
+    if (typeof speakingTimeout === 'undefined') {
+      // Ensure at least the speaking bit is set.
+      // As the object is by reference, it's only needed once per client re-connect.
+      if (userStat.speaking === 0) {
+        userStat.speaking = Speaking.FLAGS.SPEAKING;
+      }
+      this.connection.onSpeaking({ user_id: userStat.userId, ssrc: ssrc, speaking: userStat.speaking });
+      speakingTimeout = setTimeout(() => {
+        try {
+          this.connection.onSpeaking({ user_id: userStat.userId, ssrc: ssrc, speaking: 0 });
+          clearTimeout(speakingTimeout);
+          this.speakingTimeouts.delete(ssrc);
+        } catch {
+          // Connection already closed, ignore
+        }
+      }, DISCORD_SPEAKING_DELAY).unref();
+      this.speakingTimeouts.set(ssrc, speakingTimeout);
+    } else {
+      speakingTimeout.refresh();
+    }
+
+    if (streamInfo) {
+      const { stream } = streamInfo;
+      if (!opusPacket) {
+        opusPacket = this.parseBuffer(buffer);
+        if (opusPacket instanceof Error) {
+          this.emit('error', opusPacket);
+          return;
+        }
+      }
+      stream.push(opusPacket);
+    }
+  }
+}
+
+module.exports = PacketHandler;
--- a/src/client/voice/receiver/Receiver.js
+++ b/src/client/voice/receiver/Receiver.js
@@ -0,0 +1,58 @@
+'use strict';
+
+const EventEmitter = require('events');
+const prism = require('prism-media');
+const PacketHandler = require('./PacketHandler');
+const { Error } = require('../../../errors');
+
+/**
+ * Receives audio packets from a voice connection.
+ * @example
+ * const receiver = connection.createReceiver();
+ * // opusStream is a ReadableStream - that means you could play it back to a voice channel if you wanted to!
+ * const opusStream = receiver.createStream(user);
+ */
+class VoiceReceiver extends EventEmitter {
+  constructor(connection) {
+    super();
+    this.connection = connection;
+    this.packets = new PacketHandler(this);
+    /**
+     * Emitted whenever there is a warning
+     * @event VoiceReceiver#debug
+     * @param {Error|string} error The error or message to debug
+     */
+    this.packets.on('error', err => this.emit('debug', err));
+  }
+
+  /**
+   * Options passed to `VoiceReceiver#createStream`.
+   * @typedef {Object} ReceiveStreamOptions
+   * @property {string} [mode='opus'] The mode for audio output. This defaults to opus, meaning discord.js won't decode
+   * the packets for you. You can set this to 'pcm' so that the stream's output will be 16-bit little-endian stereo
+   * audio
+   * @property {string} [end='silence'] When the stream should be destroyed. If `silence`, this will be when the user
+   * stops talking. Otherwise, if `manual`, this should be handled by you.
+   */
+
+  /**
+   * Creates a new audio receiving stream. If a stream already exists for a user, then that stream will be returned
+   * rather than generating a new one.
+   * @param {UserResolvable} user The user to start listening to.
+   * @param {ReceiveStreamOptions} options Options.
+   * @returns {ReadableStream}
+   */
+  createStream(user, { mode = 'opus', end = 'silence' } = {}) {
+    user = this.connection.client.users.resolve(user);
+    if (!user) throw new Error('VOICE_USER_MISSING');
+    const stream = this.packets.makeStream(user.id, end);
+    if (mode === 'pcm') {
+      const decoder = new prism.opus.Decoder({ channels: 2, rate: 48000, frameSize: 960 });
+      stream.pipe(decoder);
+      return decoder;
+    }
+    return stream;
+  }
+}
+
+module.exports = VoiceReceiver;