voice.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. // Discordgo - Discord bindings for Go
  2. // Available at https://github.com/bwmarrin/discordgo
  3. // Copyright 2015-2016 Bruce Marriner <bruce@sqls.net>. All rights reserved.
  4. // Use of this source code is governed by a BSD-style
  5. // license that can be found in the LICENSE file.
  6. // This file contains code related to Discord voice suppport
  7. package discordgo
  8. import (
  9. "encoding/binary"
  10. "encoding/json"
  11. "fmt"
  12. "net"
  13. "runtime"
  14. "strings"
  15. "sync"
  16. "time"
  17. "github.com/gorilla/websocket"
  18. "golang.org/x/crypto/nacl/secretbox"
  19. )
  20. // ------------------------------------------------------------------------------------------------
  21. // Code related to both VoiceConnection Websocket and UDP connections.
  22. // ------------------------------------------------------------------------------------------------
  23. // A VoiceConnectionConnection struct holds all the data and functions related to a Discord Voice Connection.
  24. type VoiceConnection struct {
  25. sync.Mutex
  26. Debug bool // If true, print extra logging
  27. Ready bool // If true, voice is ready to send/receive audio
  28. GuildID string
  29. ChannelID string
  30. UserID string
  31. OpusSend chan []byte // Chan for sending opus audio
  32. OpusRecv chan *Packet // Chan for receiving opus audio
  33. wsConn *websocket.Conn
  34. udpConn *net.UDPConn
  35. session *Session
  36. sessionID string
  37. token string
  38. endpoint string
  39. // Used to send a close signal to goroutines
  40. close chan struct{}
  41. // Used to allow blocking until connected
  42. connected chan bool
  43. // Used to pass the sessionid from onVoiceStateUpdate
  44. sessionRecv chan string
  45. op4 voiceOP4
  46. op2 voiceOP2
  47. }
  48. // Speaking sends a speaking notification to Discord over the voice websocket.
  49. // This must be sent as true prior to sending audio and should be set to false
  50. // once finished sending audio.
  51. // b : Send true if speaking, false if not.
  52. func (v *VoiceConnection) Speaking(b bool) (err error) {
  53. type voiceSpeakingData struct {
  54. Speaking bool `json:"speaking"`
  55. Delay int `json:"delay"`
  56. }
  57. type voiceSpeakingOp struct {
  58. Op int `json:"op"` // Always 5
  59. Data voiceSpeakingData `json:"d"`
  60. }
  61. if v.wsConn == nil {
  62. return fmt.Errorf("No VoiceConnection websocket.")
  63. }
  64. data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}}
  65. err = v.wsConn.WriteJSON(data)
  66. if err != nil {
  67. fmt.Println("Speaking() write json error:", err)
  68. return
  69. }
  70. return
  71. }
  72. // ChangeChannel sends Discord a request to change channels within a Guild
  73. // !!! NOTE !!! This function may be removed in favour of just using ChannelVoiceJoin
  74. func (v *VoiceConnection) ChangeChannel(channelID string, mute, deaf bool) (err error) {
  75. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, &channelID, mute, deaf}}
  76. err = v.session.wsConn.WriteJSON(data)
  77. return
  78. }
  79. // Disconnect disconnects from this voice channel and closes the websocket
  80. // and udp connections to Discord.
  81. // !!! NOTE !!! this function may be removed in favour of ChannelVoiceLeave
  82. func (v *VoiceConnection) Disconnect() (err error) {
  83. // Send a OP4 with a nil channel to disconnect
  84. if v.sessionID != "" {
  85. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}}
  86. err = v.session.wsConn.WriteJSON(data)
  87. v.sessionID = ""
  88. }
  89. // Close websocket and udp connections
  90. v.Close()
  91. delete(v.session.VoiceConnections, v.GuildID)
  92. return
  93. }
  94. // Close closes the voice ws and udp connections
  95. func (v *VoiceConnection) Close() {
  96. v.Lock()
  97. defer v.Unlock()
  98. v.Ready = false
  99. if v.close != nil {
  100. close(v.close)
  101. v.close = nil
  102. }
  103. if v.udpConn != nil {
  104. err := v.udpConn.Close()
  105. if err != nil {
  106. fmt.Println("error closing udp connection: ", err)
  107. }
  108. v.udpConn = nil
  109. }
  110. if v.wsConn != nil {
  111. err := v.wsConn.Close()
  112. if err != nil {
  113. fmt.Println("error closing websocket connection: ", err)
  114. }
  115. v.wsConn = nil
  116. }
  117. }
  118. // ------------------------------------------------------------------------------------------------
  119. // Unexported Internal Functions Below.
  120. // ------------------------------------------------------------------------------------------------
  121. // A voiceOP4 stores the data for the voice operation 4 websocket event
  122. // which provides us with the NaCl SecretBox encryption key
  123. type voiceOP4 struct {
  124. SecretKey [32]byte `json:"secret_key"`
  125. Mode string `json:"mode"`
  126. }
  127. // A voiceOP2 stores the data for the voice operation 2 websocket event
  128. // which is sort of like the voice READY packet
  129. type voiceOP2 struct {
  130. SSRC uint32 `json:"ssrc"`
  131. Port int `json:"port"`
  132. Modes []string `json:"modes"`
  133. HeartbeatInterval time.Duration `json:"heartbeat_interval"`
  134. }
  135. // WaitUntilConnected waits for the Voice Connection to
  136. // become ready, if it does not become ready it retuns an err
  137. func (v *VoiceConnection) waitUntilConnected() error {
  138. i := 0
  139. for {
  140. if v.Ready {
  141. return nil
  142. }
  143. if i > 10 {
  144. return fmt.Errorf("Timeout waiting for voice.")
  145. }
  146. time.Sleep(1 * time.Second)
  147. i++
  148. }
  149. return nil
  150. }
  151. // Open opens a voice connection. This should be called
  152. // after VoiceChannelJoin is used and the data VOICE websocket events
  153. // are captured.
  154. func (v *VoiceConnection) open() (err error) {
  155. v.Lock()
  156. defer v.Unlock()
  157. // Don't open a websocket if one is already open
  158. if v.wsConn != nil {
  159. return
  160. }
  161. // TODO temp? loop to wait for the SessionID
  162. i := 0
  163. for {
  164. if v.sessionID != "" {
  165. break
  166. }
  167. if i > 20 { // only loop for up to 1 second total
  168. return fmt.Errorf("Did not receive voice Session ID in time.")
  169. }
  170. time.Sleep(50 * time.Millisecond)
  171. i++
  172. }
  173. // Connect to VoiceConnection Websocket
  174. vg := fmt.Sprintf("wss://%s", strings.TrimSuffix(v.endpoint, ":80"))
  175. v.wsConn, _, err = websocket.DefaultDialer.Dial(vg, nil)
  176. if err != nil {
  177. fmt.Println("VOICE error opening websocket:", err)
  178. return
  179. }
  180. type voiceHandshakeData struct {
  181. ServerID string `json:"server_id"`
  182. UserID string `json:"user_id"`
  183. SessionID string `json:"session_id"`
  184. Token string `json:"token"`
  185. }
  186. type voiceHandshakeOp struct {
  187. Op int `json:"op"` // Always 0
  188. Data voiceHandshakeData `json:"d"`
  189. }
  190. data := voiceHandshakeOp{0, voiceHandshakeData{v.GuildID, v.UserID, v.sessionID, v.token}}
  191. err = v.wsConn.WriteJSON(data)
  192. if err != nil {
  193. fmt.Println("VOICE error sending init packet:", err)
  194. return
  195. }
  196. // Start a listening for voice websocket events
  197. // TODO add a check here to make sure Listen worked by monitoring
  198. // a chan or bool?
  199. v.close = make(chan struct{})
  200. go v.wsListen(v.wsConn, v.close)
  201. return
  202. }
  203. // wsListen listens on the voice websocket for messages and passes them
  204. // to the voice event handler. This is automatically called by the Open func
  205. func (v *VoiceConnection) wsListen(wsConn *websocket.Conn, close <-chan struct{}) {
  206. for {
  207. messageType, message, err := v.wsConn.ReadMessage()
  208. if err != nil {
  209. // TODO: add reconnect, matching wsapi.go:listen()
  210. // TODO: Handle this problem better.
  211. // TODO: needs proper logging
  212. fmt.Println("VoiceConnection Listen Error:", err)
  213. return
  214. }
  215. // Pass received message to voice event handler
  216. select {
  217. case <-close:
  218. return
  219. default:
  220. go v.wsEvent(messageType, message)
  221. }
  222. }
  223. }
  224. // wsEvent handles any voice websocket events. This is only called by the
  225. // wsListen() function.
  226. func (v *VoiceConnection) wsEvent(messageType int, message []byte) {
  227. if v.Debug {
  228. fmt.Println("wsEvent received: ", messageType)
  229. printJSON(message)
  230. }
  231. var e Event
  232. if err := json.Unmarshal(message, &e); err != nil {
  233. fmt.Println("wsEvent Unmarshall error: ", err)
  234. return
  235. }
  236. switch e.Operation {
  237. case 2: // READY
  238. if err := json.Unmarshal(e.RawData, &v.op2); err != nil {
  239. fmt.Println("voiceWS.onEvent OP2 Unmarshall error: ", err)
  240. printJSON(e.RawData) // TODO: Better error logging
  241. return
  242. }
  243. // Start the voice websocket heartbeat to keep the connection alive
  244. go v.wsHeartbeat(v.wsConn, v.close, v.op2.HeartbeatInterval)
  245. // TODO monitor a chan/bool to verify this was successful
  246. // Start the UDP connection
  247. err := v.udpOpen()
  248. if err != nil {
  249. fmt.Println("Error opening udp connection: ", err)
  250. return
  251. }
  252. // Start the opusSender.
  253. // TODO: Should we allow 48000/960 values to be user defined?
  254. if v.OpusSend == nil {
  255. v.OpusSend = make(chan []byte, 2)
  256. }
  257. go v.opusSender(v.udpConn, v.close, v.OpusSend, 48000, 960)
  258. // Start the opusReceiver
  259. if v.OpusRecv == nil {
  260. v.OpusRecv = make(chan *Packet, 2)
  261. }
  262. go v.opusReceiver(v.udpConn, v.close, v.OpusRecv)
  263. // Send the ready event
  264. v.connected <- true
  265. return
  266. case 3: // HEARTBEAT response
  267. // add code to use this to track latency?
  268. return
  269. case 4: // udp encryption secret key
  270. v.op4 = voiceOP4{}
  271. if err := json.Unmarshal(e.RawData, &v.op4); err != nil {
  272. fmt.Println("voiceWS.onEvent OP4 Unmarshall error: ", err)
  273. printJSON(e.RawData)
  274. return
  275. }
  276. return
  277. case 5:
  278. // SPEAKING TRUE/FALSE NOTIFICATION
  279. /*
  280. {
  281. "user_id": "1238921738912",
  282. "ssrc": 2,
  283. "speaking": false
  284. }
  285. */
  286. default:
  287. fmt.Println("UNKNOWN VOICE OP: ", e.Operation)
  288. printJSON(e.RawData)
  289. }
  290. return
  291. }
  292. type voiceHeartbeatOp struct {
  293. Op int `json:"op"` // Always 3
  294. Data int `json:"d"`
  295. }
  296. // NOTE :: When a guild voice server changes how do we shut this down
  297. // properly, so a new connection can be setup without fuss?
  298. //
  299. // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client
  300. // is still connected. If you do not send these heartbeats Discord will
  301. // disconnect the websocket connection after a few seconds.
  302. func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) {
  303. if close == nil || wsConn == nil {
  304. return
  305. }
  306. var err error
  307. ticker := time.NewTicker(i * time.Millisecond)
  308. for {
  309. err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())})
  310. if err != nil {
  311. fmt.Println("wsHeartbeat send error: ", err)
  312. return
  313. }
  314. select {
  315. case <-ticker.C:
  316. // continue loop and send heartbeat
  317. case <-close:
  318. return
  319. }
  320. }
  321. }
  322. // ------------------------------------------------------------------------------------------------
  323. // Code related to the VoiceConnection UDP connection
  324. // ------------------------------------------------------------------------------------------------
  325. type voiceUDPData struct {
  326. Address string `json:"address"` // Public IP of machine running this code
  327. Port uint16 `json:"port"` // UDP Port of machine running this code
  328. Mode string `json:"mode"` // always "xsalsa20_poly1305"
  329. }
  330. type voiceUDPD struct {
  331. Protocol string `json:"protocol"` // Always "udp" ?
  332. Data voiceUDPData `json:"data"`
  333. }
  334. type voiceUDPOp struct {
  335. Op int `json:"op"` // Always 1
  336. Data voiceUDPD `json:"d"`
  337. }
  338. // udpOpen opens a UDP connection to the voice server and completes the
  339. // initial required handshake. This connection is left open in the session
  340. // and can be used to send or receive audio. This should only be called
  341. // from voice.wsEvent OP2
  342. func (v *VoiceConnection) udpOpen() (err error) {
  343. v.Lock()
  344. defer v.Unlock()
  345. if v.wsConn == nil {
  346. return fmt.Errorf("nil voice websocket")
  347. }
  348. if v.udpConn != nil {
  349. return fmt.Errorf("udp connection already open")
  350. }
  351. if v.close == nil {
  352. return fmt.Errorf("nil close channel")
  353. }
  354. if v.endpoint == "" {
  355. return fmt.Errorf("empty endpoint")
  356. }
  357. host := fmt.Sprintf("%s:%d", strings.TrimSuffix(v.endpoint, ":80"), v.op2.Port)
  358. addr, err := net.ResolveUDPAddr("udp", host)
  359. if err != nil {
  360. fmt.Println("udpOpen resolve addr error: ", err)
  361. // TODO better logging
  362. return
  363. }
  364. v.udpConn, err = net.DialUDP("udp", nil, addr)
  365. if err != nil {
  366. fmt.Println("udpOpen dial udp error: ", err)
  367. // TODO better logging
  368. return
  369. }
  370. // Create a 70 byte array and put the SSRC code from the Op 2 VoiceConnection event
  371. // into it. Then send that over the UDP connection to Discord
  372. sb := make([]byte, 70)
  373. binary.BigEndian.PutUint32(sb, v.op2.SSRC)
  374. _, err = v.udpConn.Write(sb)
  375. if err != nil {
  376. fmt.Println("udpOpen udp write error : ", err)
  377. // TODO better logging
  378. return
  379. }
  380. // Create a 70 byte array and listen for the initial handshake response
  381. // from Discord. Once we get it parse the IP and PORT information out
  382. // of the response. This should be our public IP and PORT as Discord
  383. // saw us.
  384. rb := make([]byte, 70)
  385. rlen, _, err := v.udpConn.ReadFromUDP(rb)
  386. if err != nil {
  387. fmt.Println("udpOpen udp read error : ", err)
  388. // TODO better logging
  389. return
  390. }
  391. if rlen < 70 {
  392. fmt.Println("VoiceConnection RLEN should be 70 but isn't")
  393. }
  394. // Loop over position 4 though 20 to grab the IP address
  395. // Should never be beyond position 20.
  396. var ip string
  397. for i := 4; i < 20; i++ {
  398. if rb[i] == 0 {
  399. break
  400. }
  401. ip += string(rb[i])
  402. }
  403. // Grab port from position 68 and 69
  404. port := binary.LittleEndian.Uint16(rb[68:70])
  405. // Take the data from above and send it back to Discord to finalize
  406. // the UDP connection handshake.
  407. data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}}
  408. err = v.wsConn.WriteJSON(data)
  409. if err != nil {
  410. fmt.Println("udpOpen write json error:", err)
  411. return
  412. }
  413. // start udpKeepAlive
  414. go v.udpKeepAlive(v.udpConn, v.close, 5*time.Second)
  415. // TODO: find a way to check that it fired off okay
  416. return
  417. }
  418. // udpKeepAlive sends a udp packet to keep the udp connection open
  419. // This is still a bit of a "proof of concept"
  420. func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct{}, i time.Duration) {
  421. if udpConn == nil || close == nil {
  422. return
  423. }
  424. var err error
  425. var sequence uint64
  426. packet := make([]byte, 8)
  427. ticker := time.NewTicker(i)
  428. for {
  429. binary.LittleEndian.PutUint64(packet, sequence)
  430. sequence++
  431. _, err = udpConn.Write(packet)
  432. if err != nil {
  433. fmt.Println("udpKeepAlive udp write error : ", err)
  434. return
  435. }
  436. select {
  437. case <-ticker.C:
  438. // continue loop and send keepalive
  439. case <-close:
  440. return
  441. }
  442. }
  443. }
  444. // opusSender will listen on the given channel and send any
  445. // pre-encoded opus audio to Discord. Supposedly.
  446. func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) {
  447. if udpConn == nil || close == nil {
  448. return
  449. }
  450. runtime.LockOSThread()
  451. // VoiceConnection is now ready to receive audio packets
  452. // TODO: this needs reviewed as I think there must be a better way.
  453. v.Ready = true
  454. defer func() { v.Ready = false }()
  455. var sequence uint16
  456. var timestamp uint32
  457. var recvbuf []byte
  458. var ok bool
  459. udpHeader := make([]byte, 12)
  460. var nonce [24]byte
  461. // build the parts that don't change in the udpHeader
  462. udpHeader[0] = 0x80
  463. udpHeader[1] = 0x78
  464. binary.BigEndian.PutUint32(udpHeader[8:], v.op2.SSRC)
  465. // start a send loop that loops until buf chan is closed
  466. ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000)))
  467. for {
  468. // Get data from chan. If chan is closed, return.
  469. select {
  470. case <-close:
  471. return
  472. case recvbuf, ok = <-opus:
  473. if !ok {
  474. return
  475. }
  476. // else, continue loop
  477. }
  478. // Add sequence and timestamp to udpPacket
  479. binary.BigEndian.PutUint16(udpHeader[2:], sequence)
  480. binary.BigEndian.PutUint32(udpHeader[4:], timestamp)
  481. // encrypt the opus data
  482. copy(nonce[:], udpHeader)
  483. sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey)
  484. // block here until we're exactly at the right time :)
  485. // Then send rtp audio packet to Discord over UDP
  486. select {
  487. case <-close:
  488. return
  489. case <-ticker.C:
  490. // continue
  491. }
  492. _, err := udpConn.Write(sendbuf)
  493. if err != nil {
  494. fmt.Println("error writing to udp connection: ", err)
  495. return
  496. }
  497. if (sequence) == 0xFFFF {
  498. sequence = 0
  499. } else {
  500. sequence++
  501. }
  502. if (timestamp + uint32(size)) >= 0xFFFFFFFF {
  503. timestamp = 0
  504. } else {
  505. timestamp += uint32(size)
  506. }
  507. }
  508. }
  509. // A Packet contains the headers and content of a received voice packet.
  510. type Packet struct {
  511. SSRC uint32
  512. Sequence uint16
  513. Timestamp uint32
  514. Type []byte
  515. Opus []byte
  516. PCM []int16
  517. }
  518. // opusReceiver listens on the UDP socket for incoming packets
  519. // and sends them across the given channel
  520. // NOTE :: This function may change names later.
  521. func (v *VoiceConnection) opusReceiver(udpConn *net.UDPConn, close <-chan struct{}, c chan *Packet) {
  522. if udpConn == nil || close == nil {
  523. return
  524. }
  525. p := Packet{}
  526. recvbuf := make([]byte, 1024)
  527. var nonce [24]byte
  528. for {
  529. rlen, err := udpConn.Read(recvbuf)
  530. if err != nil {
  531. fmt.Println("opusReceiver UDP Read error:", err)
  532. return
  533. }
  534. select {
  535. case <-close:
  536. return
  537. default:
  538. // continue loop
  539. }
  540. // For now, skip anything except audio.
  541. if rlen < 12 || recvbuf[0] != 0x80 {
  542. continue
  543. }
  544. // build a audio packet struct
  545. p.Type = recvbuf[0:2]
  546. p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4])
  547. p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8])
  548. p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12])
  549. // decrypt opus data
  550. copy(nonce[:], recvbuf[0:12])
  551. p.Opus, _ = secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey)
  552. if c != nil {
  553. c <- &p
  554. }
  555. }
  556. }