voice.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. // Discordgo - Discord bindings for Go
  2. // Available at https://github.com/bwmarrin/discordgo
  3. // Copyright 2015-2016 Bruce Marriner <bruce@sqls.net>. All rights reserved.
  4. // Use of this source code is governed by a BSD-style
  5. // license that can be found in the LICENSE file.
  6. // This file contains code related to Discord voice suppport
  7. package discordgo
  8. import (
  9. "encoding/binary"
  10. "encoding/json"
  11. "fmt"
  12. "net"
  13. "runtime"
  14. "strings"
  15. "sync"
  16. "time"
  17. "github.com/gorilla/websocket"
  18. "golang.org/x/crypto/nacl/secretbox"
  19. )
  20. // ------------------------------------------------------------------------------------------------
  21. // Code related to both VoiceConnection Websocket and UDP connections.
  22. // ------------------------------------------------------------------------------------------------
  23. // A VoiceConnectionConnection struct holds all the data and functions related to a Discord Voice Connection.
  24. type VoiceConnection struct {
  25. sync.Mutex
  26. Debug bool // If true, print extra logging
  27. Ready bool // If true, voice is ready to send/receive audio
  28. GuildID string
  29. ChannelID string
  30. UserID string
  31. OpusSend chan []byte // Chan for sending opus audio
  32. OpusRecv chan *Packet // Chan for receiving opus audio
  33. Receive bool // If false, don't try to receive packets
  34. OP2 *voiceOP2 // exported for dgvoice, may change.
  35. // FrameRate int // This can be used to set the FrameRate of Opus data
  36. // FrameSize int // This can be used to set the FrameSize of Opus data
  37. wsConn *websocket.Conn
  38. UDPConn *net.UDPConn // this will become unexported soon.
  39. session *Session
  40. sessionID string
  41. token string
  42. endpoint string
  43. op4 voiceOP4
  44. // Used to send a close signal to goroutines
  45. close chan struct{}
  46. // Used to allow blocking until connected
  47. connected chan bool
  48. // Used to pass the sessionid from onVoiceStateUpdate
  49. sessionRecv chan string
  50. }
  51. // ------------------------------------------------------------------------------------------------
  52. // Code related to the VoiceConnection websocket connection
  53. // ------------------------------------------------------------------------------------------------
  54. // A voiceOP4 stores the data for the voice operation 4 websocket event
  55. // which provides us with the NaCl SecretBox encryption key
  56. type voiceOP4 struct {
  57. SecretKey [32]byte `json:"secret_key"`
  58. Mode string `json:"mode"`
  59. }
  60. // A voiceOP2 stores the data for the voice operation 2 websocket event
  61. // which is sort of like the voice READY packet
  62. type voiceOP2 struct {
  63. SSRC uint32 `json:"ssrc"`
  64. Port int `json:"port"`
  65. Modes []string `json:"modes"`
  66. HeartbeatInterval time.Duration `json:"heartbeat_interval"`
  67. }
  68. type voiceHandshakeData struct {
  69. ServerID string `json:"server_id"`
  70. UserID string `json:"user_id"`
  71. SessionID string `json:"session_id"`
  72. Token string `json:"token"`
  73. }
  74. type voiceHandshakeOp struct {
  75. Op int `json:"op"` // Always 0
  76. Data voiceHandshakeData `json:"d"`
  77. }
  78. // Open opens a voice connection. This should be called
  79. // after VoiceChannelJoin is used and the data VOICE websocket events
  80. // are captured.
  81. func (v *VoiceConnection) Open() (err error) {
  82. v.Lock()
  83. defer v.Unlock()
  84. // Don't open a websocket if one is already open
  85. if v.wsConn != nil {
  86. return
  87. }
  88. // Connect to VoiceConnection Websocket
  89. vg := fmt.Sprintf("wss://%s", strings.TrimSuffix(v.endpoint, ":80"))
  90. v.wsConn, _, err = websocket.DefaultDialer.Dial(vg, nil)
  91. if err != nil {
  92. fmt.Println("VOICE error opening websocket:", err)
  93. return
  94. }
  95. data := voiceHandshakeOp{0, voiceHandshakeData{v.GuildID, v.UserID, v.sessionID, v.token}}
  96. err = v.wsConn.WriteJSON(data)
  97. if err != nil {
  98. fmt.Println("VOICE error sending init packet:", err)
  99. return
  100. }
  101. // Start a listening for voice websocket events
  102. // TODO add a check here to make sure Listen worked by monitoring
  103. // a chan or bool?
  104. v.close = make(chan struct{})
  105. go v.wsListen(v.wsConn, v.close)
  106. return
  107. }
  108. // WaitUntilConnected is a TEMP FUNCTION
  109. // And this is going to be removed or changed entirely.
  110. // I know it looks hacky, but I needed to get it working quickly.
  111. func (v *VoiceConnection) WaitUntilConnected() error {
  112. i := 0
  113. for {
  114. if v.Ready {
  115. return nil
  116. }
  117. if i > 10 {
  118. return fmt.Errorf("Timeout waiting for voice.")
  119. }
  120. time.Sleep(1 * time.Second)
  121. i++
  122. }
  123. return nil
  124. }
  125. type voiceSpeakingData struct {
  126. Speaking bool `json:"speaking"`
  127. Delay int `json:"delay"`
  128. }
  129. type voiceSpeakingOp struct {
  130. Op int `json:"op"` // Always 5
  131. Data voiceSpeakingData `json:"d"`
  132. }
  133. // Speaking sends a speaking notification to Discord over the voice websocket.
  134. // This must be sent as true prior to sending audio and should be set to false
  135. // once finished sending audio.
  136. // b : Send true if speaking, false if not.
  137. func (v *VoiceConnection) Speaking(b bool) (err error) {
  138. if v.wsConn == nil {
  139. return fmt.Errorf("No VoiceConnection websocket.")
  140. }
  141. data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}}
  142. err = v.wsConn.WriteJSON(data)
  143. if err != nil {
  144. fmt.Println("Speaking() write json error:", err)
  145. return
  146. }
  147. return
  148. }
  149. // ChangeChannel sends Discord a request to change channels within a Guild
  150. // !!! NOTE !!! This function may be removed in favour of just using ChannelVoiceJoin
  151. func (v *VoiceConnection) ChangeChannel(channelID string, mute, deaf bool) (err error) {
  152. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, &channelID, mute, deaf}}
  153. err = v.session.wsConn.WriteJSON(data)
  154. return
  155. }
  156. // Disconnect disconnects from this voice channel and closes the websocket
  157. // and udp connections to Discord.
  158. // !!! NOTE !!! this function may be removed in favour of ChannelVoiceLeave
  159. func (v *VoiceConnection) Disconnect() (err error) {
  160. // Send a OP4 with a nil channel to disconnect
  161. if v.sessionID != "" {
  162. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}}
  163. err = v.session.wsConn.WriteJSON(data)
  164. v.sessionID = ""
  165. }
  166. // Close websocket and udp connections
  167. v.Close()
  168. delete(v.session.VoiceConnections, v.GuildID)
  169. return
  170. }
  171. // Close closes the voice ws and udp connections
  172. func (v *VoiceConnection) Close() {
  173. v.Lock()
  174. defer v.Unlock()
  175. v.Ready = false
  176. if v.close != nil {
  177. close(v.close)
  178. v.close = nil
  179. }
  180. if v.UDPConn != nil {
  181. err := v.UDPConn.Close()
  182. if err != nil {
  183. fmt.Println("error closing udp connection: ", err)
  184. }
  185. v.UDPConn = nil
  186. }
  187. if v.wsConn != nil {
  188. err := v.wsConn.Close()
  189. if err != nil {
  190. fmt.Println("error closing websocket connection: ", err)
  191. }
  192. v.wsConn = nil
  193. }
  194. }
  195. // ------------------------------------------------------------------------------------------------
  196. // Unexported Internal Functions Below.
  197. // ------------------------------------------------------------------------------------------------
  198. // wsListen listens on the voice websocket for messages and passes them
  199. // to the voice event handler. This is automatically called by the Open func
  200. func (v *VoiceConnection) wsListen(wsConn *websocket.Conn, close <-chan struct{}) {
  201. for {
  202. messageType, message, err := v.wsConn.ReadMessage()
  203. if err != nil {
  204. // TODO: add reconnect, matching wsapi.go:listen()
  205. // TODO: Handle this problem better.
  206. // TODO: needs proper logging
  207. fmt.Println("VoiceConnection Listen Error:", err)
  208. return
  209. }
  210. // Pass received message to voice event handler
  211. select {
  212. case <-close:
  213. return
  214. default:
  215. go v.wsEvent(messageType, message)
  216. }
  217. }
  218. }
  219. // wsEvent handles any voice websocket events. This is only called by the
  220. // wsListen() function.
  221. func (v *VoiceConnection) wsEvent(messageType int, message []byte) {
  222. if v.Debug {
  223. fmt.Println("wsEvent received: ", messageType)
  224. printJSON(message)
  225. }
  226. var e Event
  227. if err := json.Unmarshal(message, &e); err != nil {
  228. fmt.Println("wsEvent Unmarshall error: ", err)
  229. return
  230. }
  231. switch e.Operation {
  232. case 2: // READY
  233. v.OP2 = &voiceOP2{}
  234. if err := json.Unmarshal(e.RawData, v.OP2); err != nil {
  235. fmt.Println("voiceWS.onEvent OP2 Unmarshall error: ", err)
  236. printJSON(e.RawData) // TODO: Better error logging
  237. return
  238. }
  239. // Start the voice websocket heartbeat to keep the connection alive
  240. go v.wsHeartbeat(v.wsConn, v.close, v.OP2.HeartbeatInterval)
  241. // TODO monitor a chan/bool to verify this was successful
  242. // Start the UDP connection
  243. err := v.udpOpen()
  244. if err != nil {
  245. fmt.Println("Error opening udp connection: ", err)
  246. return
  247. }
  248. // Start the opusSender.
  249. // TODO: Should we allow 48000/960 values to be user defined?
  250. if v.OpusSend == nil {
  251. v.OpusSend = make(chan []byte, 2)
  252. }
  253. go v.opusSender(v.UDPConn, v.close, v.OpusSend, 48000, 960)
  254. // Start the opusReceiver
  255. if v.OpusRecv == nil {
  256. v.OpusRecv = make(chan *Packet, 2)
  257. }
  258. if v.Receive {
  259. go v.opusReceiver(v.UDPConn, v.close, v.OpusRecv)
  260. }
  261. // Send the ready event
  262. v.connected <- true
  263. return
  264. case 3: // HEARTBEAT response
  265. // add code to use this to track latency?
  266. return
  267. case 4: // udp encryption secret key
  268. v.op4 = voiceOP4{}
  269. if err := json.Unmarshal(e.RawData, &v.op4); err != nil {
  270. fmt.Println("voiceWS.onEvent OP4 Unmarshall error: ", err)
  271. printJSON(e.RawData)
  272. return
  273. }
  274. return
  275. case 5:
  276. // SPEAKING TRUE/FALSE NOTIFICATION
  277. /*
  278. {
  279. "user_id": "1238921738912",
  280. "ssrc": 2,
  281. "speaking": false
  282. }
  283. */
  284. default:
  285. fmt.Println("UNKNOWN VOICE OP: ", e.Operation)
  286. printJSON(e.RawData)
  287. }
  288. return
  289. }
  290. type voiceHeartbeatOp struct {
  291. Op int `json:"op"` // Always 3
  292. Data int `json:"d"`
  293. }
  294. // NOTE :: When a guild voice server changes how do we shut this down
  295. // properly, so a new connection can be setup without fuss?
  296. //
  297. // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client
  298. // is still connected. If you do not send these heartbeats Discord will
  299. // disconnect the websocket connection after a few seconds.
  300. func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) {
  301. if close == nil || wsConn == nil {
  302. return
  303. }
  304. var err error
  305. ticker := time.NewTicker(i * time.Millisecond)
  306. for {
  307. err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())})
  308. if err != nil {
  309. fmt.Println("wsHeartbeat send error: ", err)
  310. return
  311. }
  312. select {
  313. case <-ticker.C:
  314. // continue loop and send heartbeat
  315. case <-close:
  316. return
  317. }
  318. }
  319. }
  320. // ------------------------------------------------------------------------------------------------
  321. // Code related to the VoiceConnection UDP connection
  322. // ------------------------------------------------------------------------------------------------
  323. type voiceUDPData struct {
  324. Address string `json:"address"` // Public IP of machine running this code
  325. Port uint16 `json:"port"` // UDP Port of machine running this code
  326. Mode string `json:"mode"` // always "xsalsa20_poly1305"
  327. }
  328. type voiceUDPD struct {
  329. Protocol string `json:"protocol"` // Always "udp" ?
  330. Data voiceUDPData `json:"data"`
  331. }
  332. type voiceUDPOp struct {
  333. Op int `json:"op"` // Always 1
  334. Data voiceUDPD `json:"d"`
  335. }
  336. // udpOpen opens a UDP connection to the voice server and completes the
  337. // initial required handshake. This connection is left open in the session
  338. // and can be used to send or receive audio. This should only be called
  339. // from voice.wsEvent OP2
  340. func (v *VoiceConnection) udpOpen() (err error) {
  341. v.Lock()
  342. defer v.Unlock()
  343. if v.wsConn == nil {
  344. return fmt.Errorf("nil voice websocket")
  345. }
  346. if v.UDPConn != nil {
  347. return fmt.Errorf("udp connection already open")
  348. }
  349. if v.close == nil {
  350. return fmt.Errorf("nil close channel")
  351. }
  352. if v.endpoint == "" {
  353. return fmt.Errorf("empty endpoint")
  354. }
  355. host := fmt.Sprintf("%s:%d", strings.TrimSuffix(v.endpoint, ":80"), v.OP2.Port)
  356. addr, err := net.ResolveUDPAddr("udp", host)
  357. if err != nil {
  358. fmt.Println("udpOpen resolve addr error: ", err)
  359. // TODO better logging
  360. return
  361. }
  362. v.UDPConn, err = net.DialUDP("udp", nil, addr)
  363. if err != nil {
  364. fmt.Println("udpOpen dial udp error: ", err)
  365. // TODO better logging
  366. return
  367. }
  368. // Create a 70 byte array and put the SSRC code from the Op 2 VoiceConnection event
  369. // into it. Then send that over the UDP connection to Discord
  370. sb := make([]byte, 70)
  371. binary.BigEndian.PutUint32(sb, v.OP2.SSRC)
  372. _, err = v.UDPConn.Write(sb)
  373. if err != nil {
  374. fmt.Println("udpOpen udp write error : ", err)
  375. // TODO better logging
  376. return
  377. }
  378. // Create a 70 byte array and listen for the initial handshake response
  379. // from Discord. Once we get it parse the IP and PORT information out
  380. // of the response. This should be our public IP and PORT as Discord
  381. // saw us.
  382. rb := make([]byte, 70)
  383. rlen, _, err := v.UDPConn.ReadFromUDP(rb)
  384. if err != nil {
  385. fmt.Println("udpOpen udp read error : ", err)
  386. // TODO better logging
  387. return
  388. }
  389. if rlen < 70 {
  390. fmt.Println("VoiceConnection RLEN should be 70 but isn't")
  391. }
  392. // Loop over position 4 though 20 to grab the IP address
  393. // Should never be beyond position 20.
  394. var ip string
  395. for i := 4; i < 20; i++ {
  396. if rb[i] == 0 {
  397. break
  398. }
  399. ip += string(rb[i])
  400. }
  401. // Grab port from position 68 and 69
  402. port := binary.LittleEndian.Uint16(rb[68:70])
  403. // Take the data from above and send it back to Discord to finalize
  404. // the UDP connection handshake.
  405. data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}}
  406. err = v.wsConn.WriteJSON(data)
  407. if err != nil {
  408. fmt.Println("udpOpen write json error:", err)
  409. return
  410. }
  411. // start udpKeepAlive
  412. go v.udpKeepAlive(v.UDPConn, v.close, 5*time.Second)
  413. // TODO: find a way to check that it fired off okay
  414. return
  415. }
  416. // udpKeepAlive sends a udp packet to keep the udp connection open
  417. // This is still a bit of a "proof of concept"
  418. func (v *VoiceConnection) udpKeepAlive(UDPConn *net.UDPConn, close <-chan struct{}, i time.Duration) {
  419. if UDPConn == nil || close == nil {
  420. return
  421. }
  422. var err error
  423. var sequence uint64
  424. packet := make([]byte, 8)
  425. ticker := time.NewTicker(i)
  426. for {
  427. binary.LittleEndian.PutUint64(packet, sequence)
  428. sequence++
  429. _, err = UDPConn.Write(packet)
  430. if err != nil {
  431. fmt.Println("udpKeepAlive udp write error : ", err)
  432. return
  433. }
  434. select {
  435. case <-ticker.C:
  436. // continue loop and send keepalive
  437. case <-close:
  438. return
  439. }
  440. }
  441. }
  442. // opusSender will listen on the given channel and send any
  443. // pre-encoded opus audio to Discord. Supposedly.
  444. func (v *VoiceConnection) opusSender(UDPConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) {
  445. if UDPConn == nil || close == nil {
  446. return
  447. }
  448. runtime.LockOSThread()
  449. // VoiceConnection is now ready to receive audio packets
  450. // TODO: this needs reviewed as I think there must be a better way.
  451. v.Ready = true
  452. defer func() { v.Ready = false }()
  453. var sequence uint16
  454. var timestamp uint32
  455. var recvbuf []byte
  456. var ok bool
  457. udpHeader := make([]byte, 12)
  458. var nonce [24]byte
  459. // build the parts that don't change in the udpHeader
  460. udpHeader[0] = 0x80
  461. udpHeader[1] = 0x78
  462. binary.BigEndian.PutUint32(udpHeader[8:], v.OP2.SSRC)
  463. // start a send loop that loops until buf chan is closed
  464. ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000)))
  465. for {
  466. // Get data from chan. If chan is closed, return.
  467. select {
  468. case <-close:
  469. return
  470. case recvbuf, ok = <-opus:
  471. if !ok {
  472. return
  473. }
  474. // else, continue loop
  475. }
  476. // Add sequence and timestamp to udpPacket
  477. binary.BigEndian.PutUint16(udpHeader[2:], sequence)
  478. binary.BigEndian.PutUint32(udpHeader[4:], timestamp)
  479. // encrypt the opus data
  480. copy(nonce[:], udpHeader)
  481. sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey)
  482. // block here until we're exactly at the right time :)
  483. // Then send rtp audio packet to Discord over UDP
  484. select {
  485. case <-close:
  486. return
  487. case <-ticker.C:
  488. // continue
  489. }
  490. _, err := UDPConn.Write(sendbuf)
  491. if err != nil {
  492. fmt.Println("error writing to udp connection: ", err)
  493. return
  494. }
  495. if (sequence) == 0xFFFF {
  496. sequence = 0
  497. } else {
  498. sequence++
  499. }
  500. if (timestamp + uint32(size)) >= 0xFFFFFFFF {
  501. timestamp = 0
  502. } else {
  503. timestamp += uint32(size)
  504. }
  505. }
  506. }
  507. // A Packet contains the headers and content of a received voice packet.
  508. type Packet struct {
  509. SSRC uint32
  510. Sequence uint16
  511. Timestamp uint32
  512. Type []byte
  513. Opus []byte
  514. PCM []int16
  515. }
  516. // opusReceiver listens on the UDP socket for incoming packets
  517. // and sends them across the given channel
  518. // NOTE :: This function may change names later.
  519. func (v *VoiceConnection) opusReceiver(UDPConn *net.UDPConn, close <-chan struct{}, c chan *Packet) {
  520. if UDPConn == nil || close == nil {
  521. return
  522. }
  523. p := Packet{}
  524. recvbuf := make([]byte, 1024)
  525. var nonce [24]byte
  526. for {
  527. rlen, err := UDPConn.Read(recvbuf)
  528. if err != nil {
  529. fmt.Println("opusReceiver UDP Read error:", err)
  530. return
  531. }
  532. select {
  533. case <-close:
  534. return
  535. default:
  536. // continue loop
  537. }
  538. // For now, skip anything except audio.
  539. if rlen < 12 || recvbuf[0] != 0x80 {
  540. continue
  541. }
  542. // build a audio packet struct
  543. p.Type = recvbuf[0:2]
  544. p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4])
  545. p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8])
  546. p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12])
  547. // decrypt opus data
  548. copy(nonce[:], recvbuf[0:12])
  549. p.Opus, _ = secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey)
  550. if c != nil {
  551. c <- &p
  552. }
  553. }
  554. }