voice.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. // Discordgo - Discord bindings for Go
  2. // Available at https://github.com/bwmarrin/discordgo
  3. // Copyright 2015-2016 Bruce Marriner <bruce@sqls.net>. All rights reserved.
  4. // Use of this source code is governed by a BSD-style
  5. // license that can be found in the LICENSE file.
  6. // This file contains code related to Discord voice suppport
  7. package discordgo
  8. import (
  9. "encoding/binary"
  10. "encoding/json"
  11. "fmt"
  12. "log"
  13. "net"
  14. "runtime"
  15. "strings"
  16. "sync"
  17. "time"
  18. "github.com/gorilla/websocket"
  19. "golang.org/x/crypto/nacl/secretbox"
  20. )
  21. // ------------------------------------------------------------------------------------------------
  22. // Code related to both VoiceConnection Websocket and UDP connections.
  23. // ------------------------------------------------------------------------------------------------
  24. // A VoiceConnection struct holds all the data and functions related to a Discord Voice Connection.
  25. type VoiceConnection struct {
  26. sync.Mutex
  27. Debug bool // If true, print extra logging
  28. Ready bool // If true, voice is ready to send/receive audio
  29. GuildID string
  30. ChannelID string
  31. UserID string
  32. OpusSend chan []byte // Chan for sending opus audio
  33. OpusRecv chan *Packet // Chan for receiving opus audio
  34. wsConn *websocket.Conn
  35. udpConn *net.UDPConn
  36. session *Session
  37. sessionID string
  38. token string
  39. endpoint string
  40. // Used to send a close signal to goroutines
  41. close chan struct{}
  42. // Used to allow blocking until connected
  43. connected chan bool
  44. // Used to pass the sessionid from onVoiceStateUpdate
  45. sessionRecv chan string
  46. op4 voiceOP4
  47. op2 voiceOP2
  48. voiceSpeakingUpdateHandlers []VoiceSpeakingUpdateHandler
  49. }
  50. // VoiceSpeakingUpdateHandler type provides a function defination for the
  51. // VoiceSpeakingUpdate event
  52. type VoiceSpeakingUpdateHandler func(vc *VoiceConnection, vs *VoiceSpeakingUpdate)
  53. // Speaking sends a speaking notification to Discord over the voice websocket.
  54. // This must be sent as true prior to sending audio and should be set to false
  55. // once finished sending audio.
  56. // b : Send true if speaking, false if not.
  57. func (v *VoiceConnection) Speaking(b bool) (err error) {
  58. type voiceSpeakingData struct {
  59. Speaking bool `json:"speaking"`
  60. Delay int `json:"delay"`
  61. }
  62. type voiceSpeakingOp struct {
  63. Op int `json:"op"` // Always 5
  64. Data voiceSpeakingData `json:"d"`
  65. }
  66. if v.wsConn == nil {
  67. return fmt.Errorf("No VoiceConnection websocket.")
  68. }
  69. data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}}
  70. err = v.wsConn.WriteJSON(data)
  71. if err != nil {
  72. log.Println("Speaking() write json error:", err)
  73. return
  74. }
  75. return
  76. }
  77. // ChangeChannel sends Discord a request to change channels within a Guild
  78. // !!! NOTE !!! This function may be removed in favour of just using ChannelVoiceJoin
  79. func (v *VoiceConnection) ChangeChannel(channelID string, mute, deaf bool) (err error) {
  80. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, &channelID, mute, deaf}}
  81. err = v.session.wsConn.WriteJSON(data)
  82. return
  83. }
  84. // Disconnect disconnects from this voice channel and closes the websocket
  85. // and udp connections to Discord.
  86. // !!! NOTE !!! this function may be removed in favour of ChannelVoiceLeave
  87. func (v *VoiceConnection) Disconnect() (err error) {
  88. // Send a OP4 with a nil channel to disconnect
  89. if v.sessionID != "" {
  90. data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}}
  91. err = v.session.wsConn.WriteJSON(data)
  92. v.sessionID = ""
  93. }
  94. // Close websocket and udp connections
  95. v.Close()
  96. delete(v.session.VoiceConnections, v.GuildID)
  97. return
  98. }
  99. // Close closes the voice ws and udp connections
  100. func (v *VoiceConnection) Close() {
  101. v.Lock()
  102. defer v.Unlock()
  103. v.Ready = false
  104. if v.close != nil {
  105. close(v.close)
  106. v.close = nil
  107. }
  108. if v.udpConn != nil {
  109. err := v.udpConn.Close()
  110. if err != nil {
  111. log.Println("error closing udp connection: ", err)
  112. }
  113. v.udpConn = nil
  114. }
  115. if v.wsConn != nil {
  116. err := v.wsConn.Close()
  117. if err != nil {
  118. log.Println("error closing websocket connection: ", err)
  119. }
  120. v.wsConn = nil
  121. }
  122. }
  123. // AddHandler adds a Handler for VoiceSpeakingUpdate events.
  124. func (v *VoiceConnection) AddHandler(h VoiceSpeakingUpdateHandler) {
  125. v.Lock()
  126. defer v.Unlock()
  127. v.voiceSpeakingUpdateHandlers = append(v.voiceSpeakingUpdateHandlers, h)
  128. }
  129. // VoiceSpeakingUpdate is a struct for a VoiceSpeakingUpdate event.
  130. type VoiceSpeakingUpdate struct {
  131. UserID string `json:"user_id"`
  132. SSRC int `json:"ssrc"`
  133. Speaking bool `json:"speaking"`
  134. }
  135. // ------------------------------------------------------------------------------------------------
  136. // Unexported Internal Functions Below.
  137. // ------------------------------------------------------------------------------------------------
  138. // A voiceOP4 stores the data for the voice operation 4 websocket event
  139. // which provides us with the NaCl SecretBox encryption key
  140. type voiceOP4 struct {
  141. SecretKey [32]byte `json:"secret_key"`
  142. Mode string `json:"mode"`
  143. }
  144. // A voiceOP2 stores the data for the voice operation 2 websocket event
  145. // which is sort of like the voice READY packet
  146. type voiceOP2 struct {
  147. SSRC uint32 `json:"ssrc"`
  148. Port int `json:"port"`
  149. Modes []string `json:"modes"`
  150. HeartbeatInterval time.Duration `json:"heartbeat_interval"`
  151. }
  152. // WaitUntilConnected waits for the Voice Connection to
  153. // become ready, if it does not become ready it retuns an err
  154. func (v *VoiceConnection) waitUntilConnected() error {
  155. i := 0
  156. for {
  157. if v.Ready {
  158. return nil
  159. }
  160. if i > 10 {
  161. return fmt.Errorf("Timeout waiting for voice.")
  162. }
  163. time.Sleep(1 * time.Second)
  164. i++
  165. }
  166. }
  167. // Open opens a voice connection. This should be called
  168. // after VoiceChannelJoin is used and the data VOICE websocket events
  169. // are captured.
  170. func (v *VoiceConnection) open() (err error) {
  171. v.Lock()
  172. defer v.Unlock()
  173. // Don't open a websocket if one is already open
  174. if v.wsConn != nil {
  175. return
  176. }
  177. // TODO temp? loop to wait for the SessionID
  178. i := 0
  179. for {
  180. if v.sessionID != "" {
  181. break
  182. }
  183. if i > 20 { // only loop for up to 1 second total
  184. return fmt.Errorf("Did not receive voice Session ID in time.")
  185. }
  186. time.Sleep(50 * time.Millisecond)
  187. i++
  188. }
  189. // Connect to VoiceConnection Websocket
  190. vg := fmt.Sprintf("wss://%s", strings.TrimSuffix(v.endpoint, ":80"))
  191. v.wsConn, _, err = websocket.DefaultDialer.Dial(vg, nil)
  192. if err != nil {
  193. log.Println("VOICE error opening websocket:", err)
  194. return
  195. }
  196. type voiceHandshakeData struct {
  197. ServerID string `json:"server_id"`
  198. UserID string `json:"user_id"`
  199. SessionID string `json:"session_id"`
  200. Token string `json:"token"`
  201. }
  202. type voiceHandshakeOp struct {
  203. Op int `json:"op"` // Always 0
  204. Data voiceHandshakeData `json:"d"`
  205. }
  206. data := voiceHandshakeOp{0, voiceHandshakeData{v.GuildID, v.UserID, v.sessionID, v.token}}
  207. err = v.wsConn.WriteJSON(data)
  208. if err != nil {
  209. log.Println("VOICE error sending init packet:", err)
  210. return
  211. }
  212. // Start a listening for voice websocket events
  213. // TODO add a check here to make sure Listen worked by monitoring
  214. // a chan or bool?
  215. v.close = make(chan struct{})
  216. go v.wsListen(v.wsConn, v.close)
  217. return
  218. }
  219. // wsListen listens on the voice websocket for messages and passes them
  220. // to the voice event handler. This is automatically called by the Open func
  221. func (v *VoiceConnection) wsListen(wsConn *websocket.Conn, close <-chan struct{}) {
  222. for {
  223. messageType, message, err := v.wsConn.ReadMessage()
  224. if err != nil {
  225. // TODO: add reconnect, matching wsapi.go:listen()
  226. // TODO: Handle this problem better.
  227. // TODO: needs proper logging
  228. log.Println("VoiceConnection Listen Error:", err)
  229. return
  230. }
  231. // Pass received message to voice event handler
  232. select {
  233. case <-close:
  234. return
  235. default:
  236. go v.wsEvent(messageType, message)
  237. }
  238. }
  239. }
  240. // wsEvent handles any voice websocket events. This is only called by the
  241. // wsListen() function.
  242. func (v *VoiceConnection) wsEvent(messageType int, message []byte) {
  243. if v.Debug {
  244. log.Println("wsEvent received: ", messageType)
  245. printJSON(message)
  246. }
  247. var e Event
  248. if err := json.Unmarshal(message, &e); err != nil {
  249. log.Println("wsEvent Unmarshall error: ", err)
  250. return
  251. }
  252. switch e.Operation {
  253. case 2: // READY
  254. if err := json.Unmarshal(e.RawData, &v.op2); err != nil {
  255. log.Println("voiceWS.onEvent OP2 Unmarshall error: ", err)
  256. printJSON(e.RawData) // TODO: Better error logging
  257. return
  258. }
  259. // Start the voice websocket heartbeat to keep the connection alive
  260. go v.wsHeartbeat(v.wsConn, v.close, v.op2.HeartbeatInterval)
  261. // TODO monitor a chan/bool to verify this was successful
  262. // Start the UDP connection
  263. err := v.udpOpen()
  264. if err != nil {
  265. log.Println("Error opening udp connection: ", err)
  266. return
  267. }
  268. // Start the opusSender.
  269. // TODO: Should we allow 48000/960 values to be user defined?
  270. if v.OpusSend == nil {
  271. v.OpusSend = make(chan []byte, 2)
  272. }
  273. go v.opusSender(v.udpConn, v.close, v.OpusSend, 48000, 960)
  274. // Start the opusReceiver
  275. if v.OpusRecv == nil {
  276. v.OpusRecv = make(chan *Packet, 2)
  277. }
  278. go v.opusReceiver(v.udpConn, v.close, v.OpusRecv)
  279. // Send the ready event
  280. v.connected <- true
  281. return
  282. case 3: // HEARTBEAT response
  283. // add code to use this to track latency?
  284. return
  285. case 4: // udp encryption secret key
  286. v.op4 = voiceOP4{}
  287. if err := json.Unmarshal(e.RawData, &v.op4); err != nil {
  288. log.Println("voiceWS.onEvent OP4 Unmarshall error: ", err)
  289. printJSON(e.RawData)
  290. return
  291. }
  292. return
  293. case 5:
  294. if len(v.voiceSpeakingUpdateHandlers) == 0 {
  295. return
  296. }
  297. voiceSpeakingUpdate := &VoiceSpeakingUpdate{}
  298. if err := json.Unmarshal(e.RawData, voiceSpeakingUpdate); err != nil {
  299. log.Println("voiceWS.onEvent VoiceSpeakingUpdate Unmarshal error: ", err)
  300. printJSON(e.RawData)
  301. return
  302. }
  303. for _, h := range v.voiceSpeakingUpdateHandlers {
  304. h(v, voiceSpeakingUpdate)
  305. }
  306. default:
  307. log.Println("UNKNOWN VOICE OP: ", e.Operation)
  308. printJSON(e.RawData)
  309. }
  310. return
  311. }
  312. type voiceHeartbeatOp struct {
  313. Op int `json:"op"` // Always 3
  314. Data int `json:"d"`
  315. }
  316. // NOTE :: When a guild voice server changes how do we shut this down
  317. // properly, so a new connection can be setup without fuss?
  318. //
  319. // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client
  320. // is still connected. If you do not send these heartbeats Discord will
  321. // disconnect the websocket connection after a few seconds.
  322. func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) {
  323. if close == nil || wsConn == nil {
  324. return
  325. }
  326. var err error
  327. ticker := time.NewTicker(i * time.Millisecond)
  328. for {
  329. err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())})
  330. if err != nil {
  331. log.Println("wsHeartbeat send error: ", err)
  332. return
  333. }
  334. select {
  335. case <-ticker.C:
  336. // continue loop and send heartbeat
  337. case <-close:
  338. return
  339. }
  340. }
  341. }
  342. // ------------------------------------------------------------------------------------------------
  343. // Code related to the VoiceConnection UDP connection
  344. // ------------------------------------------------------------------------------------------------
  345. type voiceUDPData struct {
  346. Address string `json:"address"` // Public IP of machine running this code
  347. Port uint16 `json:"port"` // UDP Port of machine running this code
  348. Mode string `json:"mode"` // always "xsalsa20_poly1305"
  349. }
  350. type voiceUDPD struct {
  351. Protocol string `json:"protocol"` // Always "udp" ?
  352. Data voiceUDPData `json:"data"`
  353. }
  354. type voiceUDPOp struct {
  355. Op int `json:"op"` // Always 1
  356. Data voiceUDPD `json:"d"`
  357. }
  358. // udpOpen opens a UDP connection to the voice server and completes the
  359. // initial required handshake. This connection is left open in the session
  360. // and can be used to send or receive audio. This should only be called
  361. // from voice.wsEvent OP2
  362. func (v *VoiceConnection) udpOpen() (err error) {
  363. v.Lock()
  364. defer v.Unlock()
  365. if v.wsConn == nil {
  366. return fmt.Errorf("nil voice websocket")
  367. }
  368. if v.udpConn != nil {
  369. return fmt.Errorf("udp connection already open")
  370. }
  371. if v.close == nil {
  372. return fmt.Errorf("nil close channel")
  373. }
  374. if v.endpoint == "" {
  375. return fmt.Errorf("empty endpoint")
  376. }
  377. host := fmt.Sprintf("%s:%d", strings.TrimSuffix(v.endpoint, ":80"), v.op2.Port)
  378. addr, err := net.ResolveUDPAddr("udp", host)
  379. if err != nil {
  380. log.Println("udpOpen resolve addr error: ", err)
  381. // TODO better logging
  382. return
  383. }
  384. v.udpConn, err = net.DialUDP("udp", nil, addr)
  385. if err != nil {
  386. log.Println("udpOpen dial udp error: ", err)
  387. // TODO better logging
  388. return
  389. }
  390. // Create a 70 byte array and put the SSRC code from the Op 2 VoiceConnection event
  391. // into it. Then send that over the UDP connection to Discord
  392. sb := make([]byte, 70)
  393. binary.BigEndian.PutUint32(sb, v.op2.SSRC)
  394. _, err = v.udpConn.Write(sb)
  395. if err != nil {
  396. log.Println("udpOpen udp write error : ", err)
  397. // TODO better logging
  398. return
  399. }
  400. // Create a 70 byte array and listen for the initial handshake response
  401. // from Discord. Once we get it parse the IP and PORT information out
  402. // of the response. This should be our public IP and PORT as Discord
  403. // saw us.
  404. rb := make([]byte, 70)
  405. rlen, _, err := v.udpConn.ReadFromUDP(rb)
  406. if err != nil {
  407. log.Println("udpOpen udp read error : ", err)
  408. // TODO better logging
  409. return
  410. }
  411. if rlen < 70 {
  412. log.Println("VoiceConnection RLEN should be 70 but isn't")
  413. }
  414. // Loop over position 4 though 20 to grab the IP address
  415. // Should never be beyond position 20.
  416. var ip string
  417. for i := 4; i < 20; i++ {
  418. if rb[i] == 0 {
  419. break
  420. }
  421. ip += string(rb[i])
  422. }
  423. // Grab port from position 68 and 69
  424. port := binary.LittleEndian.Uint16(rb[68:70])
  425. // Take the data from above and send it back to Discord to finalize
  426. // the UDP connection handshake.
  427. data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}}
  428. err = v.wsConn.WriteJSON(data)
  429. if err != nil {
  430. log.Println("udpOpen write json error:", err)
  431. return
  432. }
  433. // start udpKeepAlive
  434. go v.udpKeepAlive(v.udpConn, v.close, 5*time.Second)
  435. // TODO: find a way to check that it fired off okay
  436. return
  437. }
  438. // udpKeepAlive sends a udp packet to keep the udp connection open
  439. // This is still a bit of a "proof of concept"
  440. func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct{}, i time.Duration) {
  441. if udpConn == nil || close == nil {
  442. return
  443. }
  444. var err error
  445. var sequence uint64
  446. packet := make([]byte, 8)
  447. ticker := time.NewTicker(i)
  448. for {
  449. binary.LittleEndian.PutUint64(packet, sequence)
  450. sequence++
  451. _, err = udpConn.Write(packet)
  452. if err != nil {
  453. log.Println("udpKeepAlive udp write error : ", err)
  454. return
  455. }
  456. select {
  457. case <-ticker.C:
  458. // continue loop and send keepalive
  459. case <-close:
  460. return
  461. }
  462. }
  463. }
  464. // opusSender will listen on the given channel and send any
  465. // pre-encoded opus audio to Discord. Supposedly.
  466. func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) {
  467. if udpConn == nil || close == nil {
  468. return
  469. }
  470. runtime.LockOSThread()
  471. // VoiceConnection is now ready to receive audio packets
  472. // TODO: this needs reviewed as I think there must be a better way.
  473. v.Ready = true
  474. defer func() { v.Ready = false }()
  475. var sequence uint16
  476. var timestamp uint32
  477. var recvbuf []byte
  478. var ok bool
  479. udpHeader := make([]byte, 12)
  480. var nonce [24]byte
  481. // build the parts that don't change in the udpHeader
  482. udpHeader[0] = 0x80
  483. udpHeader[1] = 0x78
  484. binary.BigEndian.PutUint32(udpHeader[8:], v.op2.SSRC)
  485. // start a send loop that loops until buf chan is closed
  486. ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000)))
  487. for {
  488. // Get data from chan. If chan is closed, return.
  489. select {
  490. case <-close:
  491. return
  492. case recvbuf, ok = <-opus:
  493. if !ok {
  494. return
  495. }
  496. // else, continue loop
  497. }
  498. // Add sequence and timestamp to udpPacket
  499. binary.BigEndian.PutUint16(udpHeader[2:], sequence)
  500. binary.BigEndian.PutUint32(udpHeader[4:], timestamp)
  501. // encrypt the opus data
  502. copy(nonce[:], udpHeader)
  503. sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey)
  504. // block here until we're exactly at the right time :)
  505. // Then send rtp audio packet to Discord over UDP
  506. select {
  507. case <-close:
  508. return
  509. case <-ticker.C:
  510. // continue
  511. }
  512. _, err := udpConn.Write(sendbuf)
  513. if err != nil {
  514. log.Println("error writing to udp connection: ", err)
  515. return
  516. }
  517. if (sequence) == 0xFFFF {
  518. sequence = 0
  519. } else {
  520. sequence++
  521. }
  522. if (timestamp + uint32(size)) >= 0xFFFFFFFF {
  523. timestamp = 0
  524. } else {
  525. timestamp += uint32(size)
  526. }
  527. }
  528. }
  529. // A Packet contains the headers and content of a received voice packet.
  530. type Packet struct {
  531. SSRC uint32
  532. Sequence uint16
  533. Timestamp uint32
  534. Type []byte
  535. Opus []byte
  536. PCM []int16
  537. }
  538. // opusReceiver listens on the UDP socket for incoming packets
  539. // and sends them across the given channel
  540. // NOTE :: This function may change names later.
  541. func (v *VoiceConnection) opusReceiver(udpConn *net.UDPConn, close <-chan struct{}, c chan *Packet) {
  542. if udpConn == nil || close == nil {
  543. return
  544. }
  545. p := Packet{}
  546. recvbuf := make([]byte, 1024)
  547. var nonce [24]byte
  548. for {
  549. rlen, err := udpConn.Read(recvbuf)
  550. if err != nil {
  551. log.Println("opusReceiver UDP Read error:", err)
  552. return
  553. }
  554. select {
  555. case <-close:
  556. return
  557. default:
  558. // continue loop
  559. }
  560. // For now, skip anything except audio.
  561. if rlen < 12 || recvbuf[0] != 0x80 {
  562. continue
  563. }
  564. // build a audio packet struct
  565. p.Type = recvbuf[0:2]
  566. p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4])
  567. p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8])
  568. p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12])
  569. // decrypt opus data
  570. copy(nonce[:], recvbuf[0:12])
  571. p.Opus, _ = secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey)
  572. if c != nil {
  573. c <- &p
  574. }
  575. }
  576. }