voice.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. // Discordgo - Discord bindings for Go
  2. // Available at https://github.com/bwmarrin/discordgo
  3. // Copyright 2015-2016 Bruce Marriner <bruce@sqls.net>. All rights reserved.
  4. // Use of this source code is governed by a BSD-style
  5. // license that can be found in the LICENSE file.
  6. // This file contains code related to Discord voice suppport
  7. package discordgo
  8. import (
  9. "encoding/binary"
  10. "encoding/json"
  11. "fmt"
  12. "net"
  13. "runtime"
  14. "strings"
  15. "sync"
  16. "time"
  17. "github.com/gorilla/websocket"
  18. "golang.org/x/crypto/nacl/secretbox"
  19. )
  20. // ------------------------------------------------------------------------------------------------
  21. // Code related to both Voice Websocket and UDP connections.
  22. // ------------------------------------------------------------------------------------------------
  23. // A Voice struct holds all data and functions related to Discord Voice support.
  24. type Voice struct {
  25. sync.Mutex // future use
  26. Ready bool // If true, voice is ready to send/receive audio
  27. Debug bool // If true, print extra logging
  28. OP2 *voiceOP2 // exported for dgvoice, may change.
  29. OpusSend chan []byte // Chan for sending opus audio
  30. OpusRecv chan *Packet // Chan for receiving opus audio
  31. // FrameRate int // This can be used to set the FrameRate of Opus data
  32. // FrameSize int // This can be used to set the FrameSize of Opus data
  33. wsConn *websocket.Conn
  34. UDPConn *net.UDPConn // this will become unexported soon.
  35. sessionID string
  36. token string
  37. endpoint string
  38. guildID string
  39. channelID string
  40. userID string
  41. op4 voiceOP4
  42. // Used to send a close signal to goroutines
  43. close chan struct{}
  44. }
  45. // ------------------------------------------------------------------------------------------------
  46. // Code related to the Voice websocket connection
  47. // ------------------------------------------------------------------------------------------------
  48. // A voiceOP4 stores the data for the voice operation 4 websocket event
  49. // which provides us with the NaCl SecretBox encryption key
  50. type voiceOP4 struct {
  51. SecretKey [32]byte `json:"secret_key"`
  52. Mode string `json:"mode"`
  53. }
  54. // A voiceOP2 stores the data for the voice operation 2 websocket event
  55. // which is sort of like the voice READY packet
  56. type voiceOP2 struct {
  57. SSRC uint32 `json:"ssrc"`
  58. Port int `json:"port"`
  59. Modes []string `json:"modes"`
  60. HeartbeatInterval time.Duration `json:"heartbeat_interval"`
  61. }
  62. type voiceHandshakeData struct {
  63. ServerID string `json:"server_id"`
  64. UserID string `json:"user_id"`
  65. SessionID string `json:"session_id"`
  66. Token string `json:"token"`
  67. }
  68. type voiceHandshakeOp struct {
  69. Op int `json:"op"` // Always 0
  70. Data voiceHandshakeData `json:"d"`
  71. }
  72. // Open opens a voice connection. This should be called
  73. // after VoiceChannelJoin is used and the data VOICE websocket events
  74. // are captured.
  75. func (v *Voice) Open() (err error) {
  76. v.Lock()
  77. defer v.Unlock()
  78. // Don't open a websocket if one is already open
  79. if v.wsConn != nil {
  80. return
  81. }
  82. // Connect to Voice Websocket
  83. vg := fmt.Sprintf("wss://%s", strings.TrimSuffix(v.endpoint, ":80"))
  84. v.wsConn, _, err = websocket.DefaultDialer.Dial(vg, nil)
  85. if err != nil {
  86. fmt.Println("VOICE error opening websocket:", err)
  87. return
  88. }
  89. data := voiceHandshakeOp{0, voiceHandshakeData{v.guildID, v.userID, v.sessionID, v.token}}
  90. err = v.wsConn.WriteJSON(data)
  91. if err != nil {
  92. fmt.Println("VOICE error sending init packet:", err)
  93. return
  94. }
  95. // Start a listening for voice websocket events
  96. // TODO add a check here to make sure Listen worked by monitoring
  97. // a chan or bool?
  98. v.close = make(chan struct{})
  99. go v.wsListen(v.wsConn, v.close)
  100. return
  101. }
  102. // wsListen listens on the voice websocket for messages and passes them
  103. // to the voice event handler. This is automatically called by the Open func
  104. func (v *Voice) wsListen(wsConn *websocket.Conn, close <-chan struct{}) {
  105. for {
  106. messageType, message, err := v.wsConn.ReadMessage()
  107. if err != nil {
  108. // TODO: add reconnect, matching wsapi.go:listen()
  109. // TODO: Handle this problem better.
  110. // TODO: needs proper logging
  111. fmt.Println("Voice Listen Error:", err)
  112. return
  113. }
  114. // Pass received message to voice event handler
  115. select {
  116. case <-close:
  117. return
  118. default:
  119. go v.wsEvent(messageType, message)
  120. }
  121. }
  122. }
  123. // wsEvent handles any voice websocket events. This is only called by the
  124. // wsListen() function.
  125. func (v *Voice) wsEvent(messageType int, message []byte) {
  126. if v.Debug {
  127. fmt.Println("wsEvent received: ", messageType)
  128. printJSON(message)
  129. }
  130. var e Event
  131. if err := json.Unmarshal(message, &e); err != nil {
  132. fmt.Println("wsEvent Unmarshall error: ", err)
  133. return
  134. }
  135. switch e.Operation {
  136. case 2: // READY
  137. v.OP2 = &voiceOP2{}
  138. if err := json.Unmarshal(e.RawData, v.OP2); err != nil {
  139. fmt.Println("voiceWS.onEvent OP2 Unmarshall error: ", err)
  140. printJSON(e.RawData) // TODO: Better error logging
  141. return
  142. }
  143. // Start the voice websocket heartbeat to keep the connection alive
  144. go v.wsHeartbeat(v.wsConn, v.close, v.OP2.HeartbeatInterval)
  145. // TODO monitor a chan/bool to verify this was successful
  146. // Start the UDP connection
  147. err := v.udpOpen()
  148. if err != nil {
  149. fmt.Println("Error opening udp connection: ", err)
  150. return
  151. }
  152. // Start the opusSender.
  153. // TODO: Should we allow 48000/960 values to be user defined?
  154. if v.OpusSend == nil {
  155. v.OpusSend = make(chan []byte, 2)
  156. }
  157. go v.opusSender(v.UDPConn, v.close, v.OpusSend, 48000, 960)
  158. // Start the opusReceiver
  159. if v.OpusRecv == nil {
  160. v.OpusRecv = make(chan *Packet, 2)
  161. }
  162. go v.opusReceiver(v.UDPConn, v.close, v.OpusRecv)
  163. return
  164. case 3: // HEARTBEAT response
  165. // add code to use this to track latency?
  166. return
  167. case 4: // udp encryption secret key
  168. v.op4 = voiceOP4{}
  169. if err := json.Unmarshal(e.RawData, &v.op4); err != nil {
  170. fmt.Println("voiceWS.onEvent OP4 Unmarshall error: ", err)
  171. printJSON(e.RawData)
  172. return
  173. }
  174. return
  175. case 5:
  176. // SPEAKING TRUE/FALSE NOTIFICATION
  177. /*
  178. {
  179. "user_id": "1238921738912",
  180. "ssrc": 2,
  181. "speaking": false
  182. }
  183. */
  184. default:
  185. fmt.Println("UNKNOWN VOICE OP: ", e.Operation)
  186. printJSON(e.RawData)
  187. }
  188. return
  189. }
  190. type voiceHeartbeatOp struct {
  191. Op int `json:"op"` // Always 3
  192. Data int `json:"d"`
  193. }
  194. // NOTE :: When a guild voice server changes how do we shut this down
  195. // properly, so a new connection can be setup without fuss?
  196. //
  197. // wsHeartbeat sends regular heartbeats to voice Discord so it knows the client
  198. // is still connected. If you do not send these heartbeats Discord will
  199. // disconnect the websocket connection after a few seconds.
  200. func (v *Voice) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) {
  201. if close == nil || wsConn == nil {
  202. return
  203. }
  204. var err error
  205. ticker := time.NewTicker(i * time.Millisecond)
  206. for {
  207. err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())})
  208. if err != nil {
  209. fmt.Println("wsHeartbeat send error: ", err)
  210. return
  211. }
  212. select {
  213. case <-ticker.C:
  214. // continue loop and send heartbeat
  215. case <-close:
  216. return
  217. }
  218. }
  219. }
  220. type voiceSpeakingData struct {
  221. Speaking bool `json:"speaking"`
  222. Delay int `json:"delay"`
  223. }
  224. type voiceSpeakingOp struct {
  225. Op int `json:"op"` // Always 5
  226. Data voiceSpeakingData `json:"d"`
  227. }
  228. // Speaking sends a speaking notification to Discord over the voice websocket.
  229. // This must be sent as true prior to sending audio and should be set to false
  230. // once finished sending audio.
  231. // b : Send true if speaking, false if not.
  232. func (v *Voice) Speaking(b bool) (err error) {
  233. if v.wsConn == nil {
  234. return fmt.Errorf("No Voice websocket.")
  235. }
  236. data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}}
  237. err = v.wsConn.WriteJSON(data)
  238. if err != nil {
  239. fmt.Println("Speaking() write json error:", err)
  240. return
  241. }
  242. return
  243. }
  244. // ------------------------------------------------------------------------------------------------
  245. // Code related to the Voice UDP connection
  246. // ------------------------------------------------------------------------------------------------
  247. type voiceUDPData struct {
  248. Address string `json:"address"` // Public IP of machine running this code
  249. Port uint16 `json:"port"` // UDP Port of machine running this code
  250. Mode string `json:"mode"` // always "xsalsa20_poly1305"
  251. }
  252. type voiceUDPD struct {
  253. Protocol string `json:"protocol"` // Always "udp" ?
  254. Data voiceUDPData `json:"data"`
  255. }
  256. type voiceUDPOp struct {
  257. Op int `json:"op"` // Always 1
  258. Data voiceUDPD `json:"d"`
  259. }
  260. // udpOpen opens a UDP connection to the voice server and completes the
  261. // initial required handshake. This connection is left open in the session
  262. // and can be used to send or receive audio. This should only be called
  263. // from voice.wsEvent OP2
  264. func (v *Voice) udpOpen() (err error) {
  265. v.Lock()
  266. defer v.Unlock()
  267. if v.wsConn == nil {
  268. return fmt.Errorf("nil voice websocket")
  269. }
  270. if v.UDPConn != nil {
  271. return fmt.Errorf("udp connection already open")
  272. }
  273. if v.close == nil {
  274. return fmt.Errorf("nil close channel")
  275. }
  276. if v.endpoint == "" {
  277. return fmt.Errorf("empty endpoint")
  278. }
  279. host := fmt.Sprintf("%s:%d", strings.TrimSuffix(v.endpoint, ":80"), v.OP2.Port)
  280. addr, err := net.ResolveUDPAddr("udp", host)
  281. if err != nil {
  282. fmt.Println("udpOpen resolve addr error: ", err)
  283. // TODO better logging
  284. return
  285. }
  286. v.UDPConn, err = net.DialUDP("udp", nil, addr)
  287. if err != nil {
  288. fmt.Println("udpOpen dial udp error: ", err)
  289. // TODO better logging
  290. return
  291. }
  292. // Create a 70 byte array and put the SSRC code from the Op 2 Voice event
  293. // into it. Then send that over the UDP connection to Discord
  294. sb := make([]byte, 70)
  295. binary.BigEndian.PutUint32(sb, v.OP2.SSRC)
  296. _, err = v.UDPConn.Write(sb)
  297. if err != nil {
  298. fmt.Println("udpOpen udp write error : ", err)
  299. // TODO better logging
  300. return
  301. }
  302. // Create a 70 byte array and listen for the initial handshake response
  303. // from Discord. Once we get it parse the IP and PORT information out
  304. // of the response. This should be our public IP and PORT as Discord
  305. // saw us.
  306. rb := make([]byte, 70)
  307. rlen, _, err := v.UDPConn.ReadFromUDP(rb)
  308. if err != nil {
  309. fmt.Println("udpOpen udp read error : ", err)
  310. // TODO better logging
  311. return
  312. }
  313. if rlen < 70 {
  314. fmt.Println("Voice RLEN should be 70 but isn't")
  315. }
  316. // Loop over position 4 though 20 to grab the IP address
  317. // Should never be beyond position 20.
  318. var ip string
  319. for i := 4; i < 20; i++ {
  320. if rb[i] == 0 {
  321. break
  322. }
  323. ip += string(rb[i])
  324. }
  325. // Grab port from position 68 and 69
  326. port := binary.LittleEndian.Uint16(rb[68:70])
  327. // Take the data from above and send it back to Discord to finalize
  328. // the UDP connection handshake.
  329. data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}}
  330. err = v.wsConn.WriteJSON(data)
  331. if err != nil {
  332. fmt.Println("udpOpen write json error:", err)
  333. return
  334. }
  335. // start udpKeepAlive
  336. go v.udpKeepAlive(v.UDPConn, v.close, 5*time.Second)
  337. // TODO: find a way to check that it fired off okay
  338. return
  339. }
  340. // udpKeepAlive sends a udp packet to keep the udp connection open
  341. // This is still a bit of a "proof of concept"
  342. func (v *Voice) udpKeepAlive(UDPConn *net.UDPConn, close <-chan struct{}, i time.Duration) {
  343. if UDPConn == nil || close == nil {
  344. return
  345. }
  346. var err error
  347. var sequence uint64
  348. packet := make([]byte, 8)
  349. ticker := time.NewTicker(i)
  350. for {
  351. binary.LittleEndian.PutUint64(packet, sequence)
  352. sequence++
  353. _, err = UDPConn.Write(packet)
  354. if err != nil {
  355. fmt.Println("udpKeepAlive udp write error : ", err)
  356. return
  357. }
  358. select {
  359. case <-ticker.C:
  360. // continue loop and send keepalive
  361. case <-close:
  362. return
  363. }
  364. }
  365. }
  366. // opusSender will listen on the given channel and send any
  367. // pre-encoded opus audio to Discord. Supposedly.
  368. func (v *Voice) opusSender(UDPConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) {
  369. if UDPConn == nil || close == nil {
  370. return
  371. }
  372. runtime.LockOSThread()
  373. // Voice is now ready to receive audio packets
  374. // TODO: this needs reviewed as I think there must be a better way.
  375. v.Ready = true
  376. defer func() { v.Ready = false }()
  377. var sequence uint16
  378. var timestamp uint32
  379. var recvbuf []byte
  380. var ok bool
  381. udpHeader := make([]byte, 12)
  382. var nonce [24]byte
  383. // build the parts that don't change in the udpHeader
  384. udpHeader[0] = 0x80
  385. udpHeader[1] = 0x78
  386. binary.BigEndian.PutUint32(udpHeader[8:], v.OP2.SSRC)
  387. // start a send loop that loops until buf chan is closed
  388. ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000)))
  389. for {
  390. // Get data from chan. If chan is closed, return.
  391. select {
  392. case <-close:
  393. return
  394. case recvbuf, ok = <-opus:
  395. if !ok {
  396. return
  397. }
  398. // else, continue loop
  399. }
  400. // Add sequence and timestamp to udpPacket
  401. binary.BigEndian.PutUint16(udpHeader[2:], sequence)
  402. binary.BigEndian.PutUint32(udpHeader[4:], timestamp)
  403. // encrypt the opus data
  404. copy(nonce[:], udpHeader)
  405. sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey)
  406. // block here until we're exactly at the right time :)
  407. // Then send rtp audio packet to Discord over UDP
  408. select {
  409. case <-close:
  410. return
  411. case <-ticker.C:
  412. // continue
  413. }
  414. _, err := UDPConn.Write(sendbuf)
  415. if err != nil {
  416. fmt.Println("error writing to udp connection: ", err)
  417. return
  418. }
  419. if (sequence) == 0xFFFF {
  420. sequence = 0
  421. } else {
  422. sequence++
  423. }
  424. if (timestamp + uint32(size)) >= 0xFFFFFFFF {
  425. timestamp = 0
  426. } else {
  427. timestamp += uint32(size)
  428. }
  429. }
  430. }
  431. // A Packet contains the headers and content of a received voice packet.
  432. type Packet struct {
  433. SSRC uint32
  434. Sequence uint16
  435. Timestamp uint32
  436. Type []byte
  437. Opus []byte
  438. PCM []int16
  439. }
  440. // opusReceiver listens on the UDP socket for incoming packets
  441. // and sends them across the given channel
  442. // NOTE :: This function may change names later.
  443. func (v *Voice) opusReceiver(UDPConn *net.UDPConn, close <-chan struct{}, c chan *Packet) {
  444. if UDPConn == nil || close == nil {
  445. return
  446. }
  447. p := Packet{}
  448. recvbuf := make([]byte, 1024)
  449. var nonce [24]byte
  450. for {
  451. rlen, err := UDPConn.Read(recvbuf)
  452. if err != nil {
  453. fmt.Println("opusReceiver UDP Read error:", err)
  454. return
  455. }
  456. select {
  457. case <-close:
  458. return
  459. default:
  460. // continue loop
  461. }
  462. // For now, skip anything except audio.
  463. if rlen < 12 || recvbuf[0] != 0x80 {
  464. continue
  465. }
  466. // build a audio packet struct
  467. p.Type = recvbuf[0:2]
  468. p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4])
  469. p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8])
  470. p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12])
  471. // decrypt opus data
  472. copy(nonce[:], recvbuf[0:12])
  473. p.Opus, _ = secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey)
  474. if c != nil {
  475. c <- &p
  476. }
  477. }
  478. }
  479. // Close closes the voice ws and udp connections
  480. func (v *Voice) Close() {
  481. v.Lock()
  482. defer v.Unlock()
  483. v.Ready = false
  484. if v.close != nil {
  485. close(v.close)
  486. v.close = nil
  487. }
  488. if v.UDPConn != nil {
  489. err := v.UDPConn.Close()
  490. if err != nil {
  491. fmt.Println("error closing udp connection: ", err)
  492. }
  493. v.UDPConn = nil
  494. }
  495. if v.wsConn != nil {
  496. err := v.wsConn.Close()
  497. if err != nil {
  498. fmt.Println("error closing websocket connection: ", err)
  499. }
  500. v.wsConn = nil
  501. }
  502. }