Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@ -0,0 +1,61 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = [
"conn_test.go",
"pubsub_test.go",
"tcp_test.go",
],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = [
"//app/infra/databus/conf:go_default_library",
"//vendor/github.com/smartystreets/goconvey/convey:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"conn.go",
"pubsub.go",
"tcp.go",
],
importpath = "go-common/app/infra/databus/tcp",
tags = ["automanaged"],
deps = [
"//app/infra/databus/conf:go_default_library",
"//app/infra/databus/dsn:go_default_library",
"//app/infra/databus/model:go_default_library",
"//app/infra/databus/service:go_default_library",
"//library/conf/env:go_default_library",
"//library/log:go_default_library",
"//library/queue/databus:go_default_library",
"//vendor/github.com/Shopify/sarama:go_default_library",
"//vendor/github.com/bsm/sarama-cluster:go_default_library",
"//vendor/github.com/rcrowley/go-metrics:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,261 @@
package tcp
import (
"bufio"
"errors"
"fmt"
"io"
"net"
"strconv"
"strings"
"time"
)
type conn struct {
// conn
conn net.Conn
// Read
readTimeout time.Duration
br *bufio.Reader
// Write
writeTimeout time.Duration
bw *bufio.Writer
// Scratch space for formatting argument length.
// '*' or '$', length, "\r\n"
lenScratch [32]byte
// Scratch space for formatting integers and floats.
numScratch [40]byte
}
// newConn returns a new connection for the given net connection.
func newConn(netConn net.Conn, readTimeout, writeTimeout time.Duration) *conn {
return &conn{
conn: netConn,
readTimeout: readTimeout,
writeTimeout: writeTimeout,
br: bufio.NewReaderSize(netConn, _readBufSize),
bw: bufio.NewWriterSize(netConn, _writeBufSize),
}
}
// Read read data from connection
func (c *conn) Read() (cmd string, args [][]byte, err error) {
var (
ln, cn int
bs []byte
)
if c.readTimeout > 0 {
c.conn.SetReadDeadline(time.Now().Add(c.readTimeout))
}
// start read
if bs, err = c.readLine(); err != nil {
return
}
if len(bs) < 2 {
err = fmt.Errorf("read error data(%s) from connection", bs)
return
}
// maybe a cmd that without any params is received,such as: QUIT
if strings.ToLower(string(bs)) == _quit {
cmd = _quit
return
}
// get param number
if ln, err = parseLen(bs[1:]); err != nil {
return
}
args = make([][]byte, 0, ln-1)
for i := 0; i < ln; i++ {
if cn, err = c.readLen(_protoBulk); err != nil {
return
}
if bs, err = c.readData(cn); err != nil {
return
}
if i == 0 {
cmd = strings.ToLower(string(bs))
continue
}
args = append(args, bs)
}
return
}
// WriteError write error to connection and close connection
func (c *conn) WriteError(err error) {
if c.writeTimeout > 0 {
c.conn.SetWriteDeadline(time.Now().Add(c.writeTimeout))
}
if err = c.Write(proto{prefix: _protoErr, message: err.Error()}); err != nil {
c.Close()
return
}
c.Flush()
c.Close()
}
// Write write data to connection
func (c *conn) Write(p proto) (err error) {
if c.writeTimeout > 0 {
c.conn.SetWriteDeadline(time.Now().Add(c.writeTimeout))
}
// start write
switch p.prefix {
case _protoStr:
err = c.writeStatus(p.message)
case _protoErr:
err = c.writeError(p.message)
case _protoInt:
err = c.writeInt64(int64(p.integer))
case _protoBulk:
// c.writeString(p.message)
err = c.writeBytes([]byte(p.message))
case _protoArray:
err = c.writeLen(p.prefix, p.integer)
}
return
}
// Flush flush connection
func (c *conn) Flush() error {
if c.writeTimeout > 0 {
c.conn.SetWriteDeadline(time.Now().Add(c.writeTimeout))
}
return c.bw.Flush()
}
// Close close connection
func (c *conn) Close() error {
return c.conn.Close()
}
// parseLen parses bulk string and array lengths.
func parseLen(p []byte) (int, error) {
if len(p) == 0 {
return -1, errors.New("malformed length")
}
if p[0] == '-' && len(p) == 2 && p[1] == '1' {
// handle $-1 and $-1 null replies.
return -1, nil
}
var n int
for _, b := range p {
n *= 10
if b < '0' || b > '9' {
return -1, errors.New("illegal bytes in length")
}
n += int(b - '0')
}
return n, nil
}
func (c *conn) readLine() ([]byte, error) {
p, err := c.br.ReadBytes('\n')
if err == bufio.ErrBufferFull {
return nil, errors.New("long response line")
}
if err != nil {
return nil, err
}
i := len(p) - 2
if i < 0 || p[i] != '\r' {
return nil, errors.New("bad response line terminator")
}
return p[:i], nil
}
func (c *conn) readLen(prefix byte) (int, error) {
ls, err := c.readLine()
if err != nil {
return 0, err
}
if len(ls) < 2 {
return 0, errors.New("illegal bytes in length")
}
if ls[0] != prefix {
return 0, errors.New("illegal bytes in length")
}
return parseLen(ls[1:])
}
func (c *conn) readData(n int) ([]byte, error) {
if n > _maxValueSize {
return nil, errors.New("exceeding max value limit")
}
buf := make([]byte, n+2)
r, err := io.ReadFull(c.br, buf)
if err != nil {
return nil, err
}
if n != r-2 {
return nil, errors.New("invalid bytes in len")
}
return buf[:n], err
}
func (c *conn) writeLen(prefix byte, n int) error {
c.lenScratch[len(c.lenScratch)-1] = '\n'
c.lenScratch[len(c.lenScratch)-2] = '\r'
i := len(c.lenScratch) - 3
for {
c.lenScratch[i] = byte('0' + n%10)
i--
n = n / 10
if n == 0 {
break
}
}
c.lenScratch[i] = prefix
_, err := c.bw.Write(c.lenScratch[i:])
return err
}
func (c *conn) writeStatus(s string) (err error) {
c.bw.WriteByte(_protoStr)
c.bw.WriteString(s)
_, err = c.bw.WriteString("\r\n")
return
}
func (c *conn) writeError(s string) (err error) {
c.bw.WriteByte(_protoErr)
c.bw.WriteString(s)
_, err = c.bw.WriteString("\r\n")
return
}
func (c *conn) writeInt64(n int64) (err error) {
c.bw.WriteByte(_protoInt)
c.bw.Write(strconv.AppendInt(c.numScratch[:0], n, 10))
_, err = c.bw.WriteString("\r\n")
return
}
func (c *conn) writeString(s string) (err error) {
c.writeLen(_protoBulk, len(s))
c.bw.WriteString(s)
_, err = c.bw.WriteString("\r\n")
return
}
func (c *conn) writeBytes(s []byte) (err error) {
if len(s) == 0 {
c.bw.WriteByte('$')
c.bw.Write(_nullBulk)
} else {
c.writeLen(_protoBulk, len(s))
c.bw.Write(s)
}
_, err = c.bw.WriteString("\r\n")
return
}
func (c *conn) writeStrings(ss []string) (err error) {
c.writeLen(_protoArray, len(ss))
for _, s := range ss {
if err = c.writeString(s); err != nil {
return
}
}
return
}

View File

@ -0,0 +1,66 @@
package tcp
import (
"bytes"
"fmt"
"net"
"testing"
"time"
. "github.com/smartystreets/goconvey/convey"
)
type connMock struct {
}
func (c *connMock) Read(b []byte) (n int, err error) {
buf := []byte("*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nmyvalue\r\n")
copy(b, buf)
return len(buf), nil
}
func (c *connMock) Write(b []byte) (n int, err error) {
t := []byte{_protoStr}
t = append(t, []byte(_ok)...)
t = append(t, []byte("\r\n")...)
if !bytes.Equal(b, t) {
return 0, fmt.Errorf("%s not equal %s", b, t)
}
return len(b), nil
}
func (c *connMock) Close() error {
return nil
}
func (c *connMock) LocalAddr() net.Addr {
return nil
}
func (c *connMock) RemoteAddr() net.Addr {
return nil
}
func (c *connMock) SetDeadline(t time.Time) error {
return nil
}
func (c *connMock) SetReadDeadline(t time.Time) error {
return nil
}
func (c *connMock) SetWriteDeadline(t time.Time) error {
return nil
}
func TestConn(t *testing.T) {
Convey("test conn:", t, func() {
connMock := &connMock{}
conn := newConn(connMock, time.Second, time.Second)
// write
p := proto{prefix: _protoStr, message: _ok}
err := conn.Write(p)
So(err, ShouldBeNil)
err = conn.Flush()
So(err, ShouldBeNil)
// read
cmd, args, err := conn.Read()
So(err, ShouldBeNil)
So(cmd, ShouldEqual, "set")
So(string(args[0]), ShouldEqual, "mykey")
So(string(args[1]), ShouldEqual, "myvalue")
})
}

View File

@ -0,0 +1,604 @@
package tcp
import (
"bytes"
"encoding/json"
"fmt"
"io"
"strconv"
"time"
"unicode"
"go-common/app/infra/databus/conf"
"go-common/library/conf/env"
"go-common/library/log"
"go-common/library/queue/databus"
"github.com/Shopify/sarama"
cluster "github.com/bsm/sarama-cluster"
pb "github.com/gogo/protobuf/proto"
)
func stringify(b []byte) []byte {
return bytes.Map(
func(r rune) rune {
if unicode.IsSymbol(r) || unicode.IsControl(r) {
return rune('-')
}
return r
},
b,
)
}
type proto struct {
prefix byte
integer int
message string
}
// psCommon is pub sub common
type psCommon struct {
c *conn
err error
closed bool
// kafka
group string
topic string
cluster string
addr string
color []byte
}
func newPsCommon(c *conn, group, topic, color, cluster string) (ps *psCommon) {
ps = &psCommon{
c: c,
group: group,
topic: topic,
cluster: cluster,
color: []byte(color),
}
if c != nil {
ps.addr = c.conn.RemoteAddr().String()
}
return
}
func (ps *psCommon) write(protos ...proto) (err error) {
for _, p := range protos {
if err = ps.c.Write(p); err != nil {
return
}
}
err = ps.c.Flush()
return
}
func (ps *psCommon) batchWrite(protos []proto) (err error) {
if err = ps.c.Write(proto{prefix: _protoArray, integer: len(protos)}); err != nil {
return
}
for _, p := range protos {
if err = ps.c.Write(p); err != nil {
return
}
// FIXME(felix): 因为ops-log性能问题先屏蔽了
if env.DeployEnv != env.DeployEnvProd {
log.Info("batchWrite group(%s) topic(%s) cluster(%s) color(%s) addr(%s) consumer(%s) ok", ps.group, ps.topic, ps.cluster, ps.color, ps.addr, stringify([]byte(p.message)))
}
}
err = ps.c.Flush()
return
}
func (ps *psCommon) pong() (err error) {
if err = ps.write(proto{prefix: _protoStr, message: _pong}); err != nil {
return
}
log.Info("pong group(%s) topic(%s) cluster(%s) color(%s) addr(%s) ping success", ps.group, ps.topic, ps.cluster, ps.color, ps.addr)
return
}
func (ps *psCommon) Closed() bool {
return ps.closed
}
// Close 跟 redis 协议耦合的太紧,加个 sendRedisErr 开关
func (ps *psCommon) Close(sendRedisErr bool) {
if ps.closed {
return
}
if ps.err == nil {
ps.err = errConnClosedByServer // when closed by self, send close event to client.
}
// write error
if ps.err != errConnRead && ps.err != errConnClosedByClient && sendRedisErr {
ps.write(proto{prefix: _protoErr, message: ps.err.Error()})
}
if ps.c != nil {
ps.c.Close()
}
ps.closed = true
}
func (ps *psCommon) fatal(err error) {
if err == nil || ps.closed {
return
}
ps.err = err
ps.Close(true)
}
// Pub databus producer
type Pub struct {
*psCommon
// producer
producer sarama.SyncProducer
}
// NewPub new databus producer
// http 接口复用此方法c 传 nil
func NewPub(c *conn, group, topic, color string, pCfg *conf.Kafka) (p *Pub, err error) {
producer, err := newProducer(group, topic, pCfg)
if err != nil {
log.Error("group(%s) topic(%s) cluster(%s) NewPub producer error(%v)", group, topic, pCfg.Cluster, err)
return
}
p = &Pub{
psCommon: newPsCommon(c, group, topic, color, pCfg.Cluster),
producer: producer,
}
// http 协议的连接不作处理
if c != nil {
// set producer read connection timeout
p.c.readTimeout = _pubReadTimeout
}
log.Info("NewPub() success group(%s) topic(%s) color(%s) cluster(%s) addr(%s)", group, topic, color, pCfg.Cluster, p.addr)
return
}
// Serve databus producer goroutine
func (p *Pub) Serve() {
var (
err error
cmd string
args [][]byte
)
for {
if cmd, args, err = p.c.Read(); err != nil {
if err != io.EOF {
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) read error(%v)", p.group, p.topic, p.cluster, p.color, p.addr, err)
}
p.fatal(errConnRead)
return
}
if p.Closed() {
log.Warn("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) p.Closed()", p.group, p.topic, p.cluster, p.color, p.addr)
return
}
select {
case <-quit:
p.fatal(errConnClosedByServer)
return
default:
}
switch cmd {
case _auth:
err = p.write(proto{prefix: _protoStr, message: _ok})
case _ping:
err = p.pong()
case _set:
if len(args) != 2 {
p.write(proto{prefix: _protoErr, message: errPubParams.Error()})
continue
}
err = p.publish(args[0], nil, args[1])
case _hset:
if len(args) != 3 {
p.write(proto{prefix: _protoErr, message: errPubParams.Error()})
continue
}
err = p.publish(args[0], args[1], args[2])
case _quit:
err = errConnClosedByClient
default:
err = errCmdNotSupport
}
if err != nil {
p.fatal(err)
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) serve error(%v)", p.group, p.topic, p.cluster, p.color, p.addr, p.err)
return
}
}
}
func (p *Pub) publish(key, header, value []byte) (err error) {
if _, _, err = p.Publish(key, header, value); err != nil {
return
}
return p.write(proto{prefix: _protoStr, message: _ok})
}
// Publish 发送消息 redis 和 http 协议共用
func (p *Pub) Publish(key, header, value []byte) (partition int32, offset int64, err error) {
var message = &sarama.ProducerMessage{
Topic: p.topic,
Key: sarama.ByteEncoder(key),
Value: sarama.ByteEncoder(value),
Headers: []sarama.RecordHeader{
{Key: _headerColor, Value: p.color},
{Key: _headerMetadata, Value: header},
},
}
now := time.Now()
// TODO(felix): support RecordHeader
if partition, offset, err = p.producer.SendMessage(message); err != nil {
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) publish(%v) error(%v)", p.group, p.topic, p.cluster, p.color, p.addr, message, err)
return
}
if svc != nil {
svc.TimeProm.Timing(p.group, int64(time.Since(now)/time.Millisecond))
svc.CountProm.Incr(_opProducerMsgSpeed, p.group, p.topic)
}
// FIXME(felix): 因为ops-log性能问题先屏蔽了
if env.DeployEnv != env.DeployEnvProd {
log.Info("publish group(%s) topic(%s) cluster(%s) color(%s) addr(%s) key(%s) header(%s) value(%s) ok", p.group, p.topic, p.cluster, p.color, p.addr, key, stringify(header), stringify(value))
}
return
}
// Sub databus consumer
type Sub struct {
*psCommon
// kafka consumer
consumer *cluster.Consumer
waitClosing bool
batch int
// ticker
ticker *time.Ticker
}
// NewSub new databus consumer
func NewSub(c *conn, group, topic, color string, sCfg *conf.Kafka, batch int64) (s *Sub, err error) {
select {
case <-consumerLimter:
default:
}
// NOTE color 用于染色消费消息过虑
if color != "" {
group = fmt.Sprintf("%s-%s", group, color)
}
if err = validate(group, topic, sCfg.Brokers); err != nil {
return
}
s = &Sub{
psCommon: newPsCommon(c, group, topic, color, sCfg.Cluster),
ticker: time.NewTicker(_batchInterval),
}
if batch == 0 {
s.batch = _batchNum
} else {
s.batch = int(batch)
}
// set consumer read connection timeout
s.c.readTimeout = _subReadTimeout
// cluster config
cfg := cluster.NewConfig()
cfg.Version = sarama.V1_0_0_0
cfg.ClientID = fmt.Sprintf("%s-%s", group, topic)
cfg.Net.KeepAlive = 30 * time.Second
// NOTE cluster auto commit offset interval
cfg.Consumer.Offsets.CommitInterval = time.Second * 1
// NOTE set fetch.wait.max.ms
cfg.Consumer.MaxWaitTime = time.Millisecond * 250
cfg.Consumer.MaxProcessingTime = 50 * time.Millisecond
// NOTE errors that occur during offset management,if enabled, c.Errors channel must be read
cfg.Consumer.Return.Errors = true
// NOTE notifications that occur during consumer, if enabled, c.Notifications channel must be read
cfg.Group.Return.Notifications = true
// The initial offset to use if no offset was previously committed.
// default: OffsetOldest
cfg.Consumer.Offsets.Initial = sarama.OffsetNewest
if s.consumer, err = cluster.NewConsumer(sCfg.Brokers, group, []string{topic}, cfg); err != nil {
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) cluster.NewConsumer() error(%v)", s.group, s.topic, s.cluster, s.color, s.addr, err)
} else {
log.Info("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) cluster.NewConsumer() ok", s.group, s.topic, s.cluster, s.color, s.addr)
}
return
}
func validate(group, topic string, brokers []string) (err error) {
var (
cli *cluster.Client
c *cluster.Config
broker *sarama.Broker
gresp *sarama.DescribeGroupsResponse
memberAssignment *sarama.ConsumerGroupMemberAssignment
consumerNum int
partitions []int32
)
c = cluster.NewConfig()
c.Version = sarama.V0_10_0_1
if cli, err = cluster.NewClient(brokers, c); err != nil {
log.Error("group(%s) topic(%s) cluster.NewClient() error(%v)", group, topic, err)
err = errKafKaData
return
}
defer cli.Close()
if partitions, err = cli.Partitions(topic); err != nil {
log.Error("group(%s) topic(%s) cli.Partitions error(%v)", group, topic, err)
err = errKafKaData
return
}
if len(partitions) <= 0 {
err = errKafKaData
return
}
if err = cli.RefreshCoordinator(group); err != nil {
log.Error("group(%s) topic(%s) cli.RefreshCoordinator error(%v)", group, topic, err)
err = errKafKaData
return
}
if broker, err = cli.Coordinator(group); err != nil {
log.Error("group(%s) topic(%s) cli.Coordinator error(%v)", group, topic, err)
err = errKafKaData
return
}
defer broker.Close()
if gresp, err = broker.DescribeGroups(&sarama.DescribeGroupsRequest{
Groups: []string{group},
}); err != nil {
log.Error("group(%s) topic(%s) cli.DescribeGroups error(%v)", group, topic, err)
err = errKafKaData
return
}
if len(gresp.Groups) != 1 {
err = errKafKaData
return
}
for _, member := range gresp.Groups[0].Members {
if memberAssignment, err = member.GetMemberAssignment(); err != nil {
log.Error("group(%s) topic(%s) member.GetMemberAssignment error(%v)", group, topic, err)
err = errKafKaData
return
}
for mtopic := range memberAssignment.Topics {
if mtopic == topic {
consumerNum++
break
}
}
}
if consumerNum >= len(partitions) {
err = errUseLessConsumer
return
}
return nil
}
// Serve databus consumer goroutine
func (s *Sub) Serve() {
var (
err error
cmd string
args [][]byte
)
defer func() {
svc.CountProm.Decr(_opCurrentConsumer, s.group, s.topic)
}()
log.Info("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) begin serve", s.group, s.topic, s.cluster, s.color, s.addr)
for {
if cmd, args, err = s.c.Read(); err != nil {
if err != io.EOF {
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) read error(%v)", s.group, s.topic, s.cluster, s.color, s.addr, err)
}
s.fatal(errConnRead)
return
}
if s.consumer == nil {
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) s.consumer is nil", s.group, s.topic, s.cluster, s.color, s.addr)
s.fatal(errConsumerClosed)
return
}
if s.Closed() {
log.Warn("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) s.Closed()", s.group, s.topic, s.cluster, s.color, s.addr)
return
}
switch cmd {
case _auth:
err = s.write(proto{prefix: _protoStr, message: _ok})
case _ping:
err = s.pong()
case _mget:
var enc []byte
if len(args) > 0 {
enc = args[0]
}
err = s.message(enc)
case _set:
err = s.commit(args)
case _quit:
err = errConnClosedByClient
default:
err = errCmdNotSupport
}
if err != nil {
s.fatal(err)
log.Error("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) serve error(%v)", s.group, s.topic, s.cluster, s.color, s.addr, err)
return
}
}
}
func (s *Sub) message(enc []byte) (err error) {
var (
msg *sarama.ConsumerMessage
notify *cluster.Notification
protos []proto
ok bool
bs []byte
last = time.Now()
ret = &databus.MessagePB{}
p = proto{prefix: _protoBulk}
)
for {
select {
case err = <-s.consumer.Errors():
log.Error("group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", s.group, s.topic, s.cluster, s.addr, err)
return
case notify, ok = <-s.consumer.Notifications():
if !ok {
log.Info("notification notOk group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", s.group, s.topic, s.cluster, s.addr, err)
err = errClosedNotifyChannel
return
}
switch notify.Type {
case cluster.UnknownNotification, cluster.RebalanceError:
log.Error("notification(%s) group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", notify.Type, s.group, s.topic, s.cluster, s.addr, err)
err = errClosedNotifyChannel
return
case cluster.RebalanceStart:
log.Info("notification(%s) group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", notify.Type, s.group, s.topic, s.cluster, s.addr, err)
continue
case cluster.RebalanceOK:
log.Info("notification(%s) group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", notify.Type, s.group, s.topic, s.cluster, s.addr, err)
}
if len(notify.Current[s.topic]) == 0 {
log.Warn("notification(%s) no topic group(%s) topic(%s) cluster(%s) addr(%s) catch error(%v)", notify.Type, s.group, s.topic, s.cluster, s.addr, err)
err = errConsumerOver
return
}
case msg, ok = <-s.consumer.Messages():
if !ok {
log.Error("group(%s) topic(%s) cluster(%s) addr(%s) message channel closed", s.group, s.topic, s.cluster, s.addr)
err = errClosedMsgChannel
return
}
// reset timestamp
last = time.Now()
ret.Key = string(msg.Key)
ret.Value = msg.Value
ret.Topic = s.topic
ret.Partition = msg.Partition
ret.Offset = msg.Offset
ret.Timestamp = msg.Timestamp.Unix()
if len(msg.Headers) > 0 {
var notMatchColor bool
for _, h := range msg.Headers {
if bytes.Equal(h.Key, _headerColor) && !bytes.Equal(h.Value, s.color) {
// match color
notMatchColor = true
} else if bytes.Equal(h.Key, _headerMetadata) && h.Value != nil {
// parse metadata
dh := new(databus.Header)
if err = pb.Unmarshal(h.Value, dh); err != nil {
log.Error("pb.Unmarshal(%s) error(%v)", h.Value, err)
err = nil
} else {
ret.Metadata = dh.Metadata
}
}
}
if notMatchColor {
continue
}
}
if bytes.Equal(enc, _encodePB) {
// encode to pb bytes
if bs, err = pb.Marshal(ret); err != nil {
log.Error("proto.Marshal(%v) error(%v)", ret, err)
s.consumer.MarkPartitionOffset(s.topic, msg.Partition, msg.Offset, "")
return s.write(proto{prefix: _protoErr, message: errMsgFormat.Error()})
}
} else {
// encode to json bytes
if bs, err = json.Marshal(ret); err != nil {
log.Error("json.Marshal(%v) error(%v)", ret, err)
s.consumer.MarkPartitionOffset(s.topic, msg.Partition, msg.Offset, "")
return s.write(proto{prefix: _protoErr, message: errMsgFormat.Error()})
}
}
svc.StatProm.State(_opPartitionOffset, msg.Offset, s.group, s.topic, strconv.Itoa(int(msg.Partition)))
svc.CountProm.Incr(_opConsumerMsgSpeed, s.group, s.topic)
svc.StatProm.Incr(_opConsumerPartition, s.group, s.topic, strconv.Itoa(int(msg.Partition)))
p.message = string(bs)
protos = append(protos, p)
if len(protos) >= s.batch {
return s.batchWrite(protos)
}
case <-s.ticker.C:
if len(protos) != 0 {
return s.batchWrite(protos)
}
if time.Since(last) < _batchTimeout {
continue
}
if s.waitClosing {
log.Info("consumer group(%s) topic(%s) cluster(%s) addr(%s) wait closing then exit,maybe cluster changed", s.group, s.topic, s.cluster, s.addr)
err = errConsumerTimeout
return
}
return s.batchWrite(protos)
}
}
}
func (s *Sub) commit(args [][]byte) (err error) {
var (
partition, offset int64
)
if len(args) != 2 {
log.Error("group(%v) topic(%v) cluster(%s) addr(%s) commit offset error, args(%v) is illegal", s.group, s.topic, s.cluster, s.addr, args)
// write error
return s.write(proto{prefix: _protoErr, message: errCommitParams.Error()})
}
if partition, err = strconv.ParseInt(string(args[0]), 10, 32); err != nil {
return s.write(proto{prefix: _protoErr, message: errCommitParams.Error()})
}
if offset, err = strconv.ParseInt(string(args[1]), 10, 64); err != nil {
return s.write(proto{prefix: _protoErr, message: errCommitParams.Error()})
}
// mark partition offset
s.consumer.MarkPartitionOffset(s.topic, int32(partition), offset, "")
// FIXME(felix): 因为ops-log性能问题先屏蔽了
if env.DeployEnv != env.DeployEnvProd {
log.Info("commit group(%s) topic(%s) cluster(%s) color(%s) addr(%s) partition(%d) offset(%d) mark offset succeed", s.group, s.topic, s.cluster, s.color, s.addr, partition, offset)
}
return s.write(proto{prefix: _protoStr, message: _ok})
}
// Closed judge if consumer is closed
func (s *Sub) Closed() bool {
return s.psCommon != nil && s.psCommon.Closed()
}
// Close close consumer
func (s *Sub) Close() {
if !s.psCommon.Closed() {
s.psCommon.Close(true)
}
if s.consumer != nil {
s.consumer.Close()
s.consumer = nil
}
if s.ticker != nil {
s.ticker.Stop()
}
log.Info("group(%s) topic(%s) cluster(%s) color(%s) addr(%s) consumer exit", s.group, s.topic, s.cluster, s.color, s.addr)
}
// WaitClosing marks closing state and close when consumer stoped until 30s.
func (s *Sub) WaitClosing() {
s.waitClosing = true
}
func (s *Sub) fatal(err error) {
if err == nil || s.closed {
return
}
if s.psCommon != nil {
s.psCommon.fatal(err)
}
s.Close()
}

View File

@ -0,0 +1,70 @@
package tcp
import (
"fmt"
"io/ioutil"
"net"
"testing"
"time"
"go-common/app/infra/databus/conf"
. "github.com/smartystreets/goconvey/convey"
)
var (
pubCfg = &conf.Kafka{
Cluster: "test_topic",
Brokers: []string{"172.22.33.174:9092", "172.22.33.183:9092", "172.22.33.185:9092"},
}
)
func TestDatabus(t *testing.T) {
Convey("Test publish:", t, func() {
l, _ := net.Listen("tcp", ":8888")
go func() {
for {
conn, err := l.Accept()
if err != nil {
continue
}
b, err := ioutil.ReadAll(conn)
if err == nil {
fmt.Printf("test conn: %s", b)
}
conn.Close()
}
}()
conn, err := net.Dial("tcp", ":8888")
So(err, ShouldBeNil)
p, err := NewPub(newConn(conn, time.Second, time.Second), "pub", "", _testTopic, pubCfg)
So(err, ShouldBeNil)
key := []byte("key")
header := []byte("header")
msg := []byte("message")
err = p.publish(key, header, msg)
So(err, ShouldBeNil)
time.Sleep(time.Second)
Convey("test sub", func() {
conn, _ := net.Dial("tcp", ":8888")
s, err := NewSub(newConn(conn, time.Second, time.Second), "sub", "", _testTopic, pubCfg, 1)
So(err, ShouldBeNil)
t.Logf("subscriptions: %v", s.consumer.Subscriptions())
for {
select {
case msg := <-s.consumer.Messages():
s.consumer.CommitOffsets()
t.Logf("sub message: %s timestamp: %d", msg.Value, msg.Timestamp.Unix())
return
case err := <-s.consumer.Errors():
t.Errorf("error: %v", err)
So(err, ShouldBeNil)
case n := <-s.consumer.Notifications():
t.Logf("notify: %v", n)
err := p.publish(key, header, msg)
So(err, ShouldBeNil)
}
}
})
})
}

View File

@ -0,0 +1,439 @@
package tcp
import (
"errors"
lg "log"
"net"
"os"
"sync"
"time"
"go-common/app/infra/databus/conf"
"go-common/app/infra/databus/dsn"
"go-common/app/infra/databus/model"
"go-common/app/infra/databus/service"
"go-common/library/log"
"github.com/Shopify/sarama"
metrics "github.com/rcrowley/go-metrics"
)
const (
// redis proto
_protoStr = '+'
_protoErr = '-'
_protoInt = ':'
_protoBulk = '$'
_protoArray = '*'
// redis cmd
_ping = "ping"
_auth = "auth"
_quit = "quit"
_set = "set"
_hset = "hset"
_mget = "mget"
_ok = "OK"
_pong = "PONG"
// client role
_rolePub = "pub"
_roleSub = "sub"
_listenDelay = 5 * time.Millisecond // how long to sleep on accept failure
_clearDelay = 30 * time.Second
_batchNum = 100 // batch write message length
_batchInterval = 100 * time.Millisecond // batch write interval
_batchTimeout = 30 * time.Second // return empty if timeout
// connection timeout
_readTimeout = 5 * time.Second
_writeTimeout = 5 * time.Second
_pubReadTimeout = 20 * time.Minute
_subReadTimeout = _batchTimeout + 10*time.Second
// conn read buffer size 64K
_readBufSize = 1024 * 64
// conn write buffer size 8K
_writeBufSize = 1024 * 8
// conn max value size(kafka 1M)
_maxValueSize = 1000000
// prom operation
_opAddConsumerRequest = "request_add_comsuner"
_opCurrentConsumer = "current_consumer"
_opAddProducerRequest = "request_add_producer"
_opAuthError = "auth_error"
_opProducerMsgSpeed = "producer_msg_speed"
_opConsumerMsgSpeed = "consumer_msg_speed"
_opConsumerPartition = "consumer_partition_speed"
_opPartitionOffset = "consumer_partition_offset"
)
var (
_nullBulk = []byte("-1")
// kafka header
_headerColor = []byte("color")
_headerMetadata = []byte("metadata")
// encode type pb/json
_encodePB = []byte("pb")
)
var (
errCmdAuthFailed = errors.New("auth failed")
errAuthInfo = errors.New("auth info error")
errPubParams = errors.New("pub params error")
errCmdNotSupport = errors.New("command not support")
errClusterNotExist = errors.New("cluster not exist")
errClusterNotSupport = errors.New("cluster not support")
errConnClosedByServer = errors.New("connection closed by databus")
errConnClosedByClient = errors.New("connection closed by client")
errClosedMsgChannel = errors.New("message channel is closed")
errClosedNotifyChannel = errors.New("notification channel is closed")
errNoPubPermission = errors.New("no publish permission")
errNoSubPermission = errors.New("no subscribe permission")
errConsumerClosed = errors.New("kafka consumer closed")
errCommitParams = errors.New("commit offset params error")
errMsgFormat = errors.New("message format must be json")
errConsumerOver = errors.New("too many consumers")
errConsumerTimeout = errors.New("consumer initial timeout")
errConnRead = errors.New("connection read error")
errUseLessConsumer = errors.New("useless consumer")
errKafKaData = errors.New("err kafka data maybe rebalancing")
errCousmerCreateLimiter = errors.New("err consumer create limiter")
)
var (
// tcp listener
listener net.Listener
quit = make(chan struct{})
// producer snapshot, key:group+topic
producers = make(map[string]sarama.SyncProducer)
pLock sync.RWMutex
// Pubs
pubs = make(map[*Pub]struct{})
pubLock sync.RWMutex
// Subs
subs = make(map[*Sub]struct{})
subLock sync.RWMutex
// service for auth
svc *service.Service
// limiter
consumerLimter = make(chan struct{}, 100)
)
// Init init service
func Init(c *conf.Config, s *service.Service) {
var err error
if listener, err = net.Listen("tcp", c.Addr); err != nil {
panic(err)
}
// sarama should be initialized otherwise errors will be ignored when sarama catch error
sarama.Logger = lg.New(os.Stdout, "[Sarama] ", lg.LstdFlags)
// sarama metrics disable
metrics.UseNilMetrics = true
svc = s
log.Info("start tcp listen addr: %s", c.Addr)
go accept()
go clear()
go clusterproc()
}
func newProducer(group, topic string, pCfg *conf.Kafka) (p sarama.SyncProducer, err error) {
var (
ok bool
key = key(pCfg.Cluster, group, topic)
)
pLock.RLock()
if p, ok = producers[key]; ok {
pLock.RUnlock()
return
}
pLock.RUnlock()
// new
conf := sarama.NewConfig()
conf.Producer.Return.Successes = true
conf.Version = sarama.V1_0_0_0
if p, err = sarama.NewSyncProducer(pCfg.Brokers, conf); err != nil {
log.Error("group(%s) topic(%s) cluster(%s) NewSyncProducer error(%v)", group, topic, pCfg.Cluster, err)
return
}
pLock.Lock()
producers[key] = p
pLock.Unlock()
return
}
// Close close all producers and consumers
func Close() {
close(quit)
if listener != nil {
listener.Close()
}
// close all consumers
subLock.RLock()
for sub := range subs {
sub.Close()
}
subLock.RUnlock()
pubLock.RLock()
for pub := range pubs {
pub.Close(true)
}
pubLock.RUnlock()
pLock.RLock()
for _, p := range producers {
p.Close()
}
pLock.RUnlock()
}
func accept() {
var (
err error
ok bool
netC net.Conn
netE net.Error
)
for {
if netC, err = listener.Accept(); err != nil {
if netE, ok = err.(net.Error); ok && netE.Temporary() {
log.Error("tcp: Accept error: %v; retrying in %v", err, _listenDelay)
time.Sleep(_listenDelay)
continue
}
return
}
select {
case <-quit:
netC.Close()
return
default:
}
go serveConn(netC)
}
}
// serveConn serve tcp connect.
func serveConn(nc net.Conn) {
var (
err error
p *Pub
s *Sub
d *dsn.DSN
cfg *conf.Kafka
batch int64
addr = nc.RemoteAddr().String()
)
c := newConn(nc, _readTimeout, _writeTimeout)
if d, cfg, batch, err = auth(c); err != nil {
log.Error("auth failed addr(%s) error(%v)", addr, err)
c.WriteError(err)
return
}
// auth succeed
if err = c.Write(proto{prefix: _protoStr, message: _ok}); err != nil {
log.Error("c.Write() error(%v)", err)
c.Close()
return
}
if err = c.Flush(); err != nil {
c.Close()
return
}
log.Info("auth succeed group(%s) topic(%s) color(%s) cluster(%s) addr(%s) role(%s)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, d.Role)
// command
switch d.Role {
case _rolePub: // producer
svc.CountProm.Incr(_opAddProducerRequest, d.Group, d.Topic)
if p, err = NewPub(c, d.Group, d.Topic, d.Color, cfg); err != nil {
c.WriteError(err)
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) NewPub error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err)
return
}
pubLock.Lock()
pubs[p] = struct{}{}
pubLock.Unlock()
p.Serve()
case _roleSub: // consumer
svc.CountProm.Incr(_opAddConsumerRequest, d.Group, d.Topic)
select {
case consumerLimter <- struct{}{}:
default:
err = errCousmerCreateLimiter
c.WriteError(err)
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err)
return
}
if s, err = NewSub(c, d.Group, d.Topic, d.Color, cfg, batch); err != nil {
c.WriteError(err)
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) NewSub error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err)
return
}
subLock.Lock()
subs[s] = struct{}{}
subLock.Unlock()
s.Serve()
svc.CountProm.Incr(_opCurrentConsumer, d.Group, d.Topic)
default:
// other command will not be, auth check that.
}
}
func auth(c *conn) (d *dsn.DSN, cfg *conf.Kafka, batch int64, err error) {
var (
args [][]byte
cmd string
addr = c.conn.RemoteAddr().String()
)
if cmd, args, err = c.Read(); err != nil {
log.Error("c.Read addr(%s) error(%v)", addr, err)
return
}
if cmd != _auth || len(args) != 1 {
log.Error("c.Read addr(%s) first cmd(%s) not auth or have not enough args(%v)", addr, cmd, args)
err = errCmdAuthFailed
return
}
// key:secret@group/topic=?&role=?&offset=?
if d, err = dsn.ParseDSN(string(args[0])); err != nil {
log.Error("auth failed arg(%s) is illegal,addr(%s) error(%v)", args[0], addr, err)
return
}
cfg, batch, err = Auth(d, addr)
return
}
// Auth 校验认证信息并反回相应配置
// 与 http 接口共用,不要在此方法执行 io 操作
func Auth(d *dsn.DSN, addr string) (cfg *conf.Kafka, batch int64, err error) {
var (
a *model.Auth
ok bool
)
if a, ok = svc.AuthApp(d.Group); !ok {
log.Error("addr(%s) group(%s) cant not be found", addr, d.Group)
svc.CountProm.Incr(_opAuthError, d.Group, d.Topic)
err = errAuthInfo
return
}
batch = a.Batch
if err = a.Auth(d.Group, d.Topic, d.Key, d.Secret); err != nil {
log.Error("a.Auth addr(%s) group(%s) topic(%s) color(%s) key(%s) secret(%s) error(%v)", addr, d.Group, d.Topic, d.Color, d.Key, d.Secret, err)
svc.CountProm.Incr(_opAuthError, d.Group, d.Topic)
return
}
switch d.Role {
case _rolePub:
if !a.CanPub() {
err = errNoPubPermission
return
}
case _roleSub:
if !a.CanSub() {
err = errNoSubPermission
return
}
default:
err = errCmdNotSupport
return
}
if len(conf.Conf.Clusters) == 0 {
err = errClusterNotExist
return
}
if cfg, ok = conf.Conf.Clusters[a.Cluster]; !ok || cfg == nil {
log.Error("a.Auth addr(%s) group(%s) topic(%s) color(%s) key(%s) secret(%s) cluster(%s) not support", addr, d.Group, d.Topic, d.Color, d.Key, d.Secret, a.Cluster)
err = errClusterNotSupport
}
// TODO check ip addr
// rAddr = conn.RemoteAddr().String()
return
}
// ConsumerAddrs returns consumer addrs.
func ConsumerAddrs(group string) (addrs []string, err error) {
subLock.RLock()
for sub := range subs {
if sub.group == group {
addrs = append(addrs, sub.addr)
}
}
subLock.RUnlock()
return
}
func key(cluster, group, topic string) string {
return cluster + ":" + group + ":" + topic
}
func clear() {
for {
time.Sleep(_clearDelay)
t := time.Now()
log.Info("clear proc start,id(%d)", t.Nanosecond())
subLock.Lock()
for sub := range subs {
if sub.Closed() {
delete(subs, sub)
}
}
subLock.Unlock()
pubLock.Lock()
for pub := range pubs {
if pub.Closed() {
delete(pubs, pub)
}
}
pubLock.Unlock()
log.Info("clear proc end,id(%d) used(%d)", t.Nanosecond(), time.Since(t))
}
}
func clusterproc() {
for {
oldAuth, ok := <-svc.ClusterEvent()
if !ok {
return
}
log.Info("cluster changed event group(%s) topic(%s) cluster(%s)", oldAuth.Group, oldAuth.Topic, oldAuth.Cluster)
k := key(oldAuth.Cluster, oldAuth.Group, oldAuth.Topic)
pLock.Lock()
if p, ok := producers[k]; ok {
// renew producer
if newAuth, ok := svc.AuthApp(oldAuth.Group); ok {
pLock.Unlock()
np, err := newProducer(newAuth.Group, newAuth.Topic, conf.Conf.Clusters[newAuth.Cluster])
pLock.Lock()
// check pubs
pubLock.Lock()
for pub := range pubs {
if pub.group == oldAuth.Group && pub.topic == oldAuth.Topic {
if err != nil {
pub.Close(true)
} else {
pub.producer = np
}
}
}
pubLock.Unlock()
}
// close unused producer
p.Close()
delete(producers, k)
}
pLock.Unlock()
// wait closing subs
subLock.Lock()
for sub := range subs {
if sub.group == oldAuth.Group && sub.topic == oldAuth.Topic {
sub.WaitClosing()
}
}
subLock.Unlock()
}
}

View File

@ -0,0 +1,60 @@
package tcp
import (
"net"
"sync"
"testing"
"time"
"go-common/app/infra/databus/conf"
)
var (
_testGroup = "test-consumer-group"
_testTopic = "test_topic"
_testAddr = "172.22.33.174:9092"
_testConfig = &conf.Kafka{
Cluster: _testGroup,
Brokers: []string{_testAddr},
}
)
func TestNewSub(t *testing.T) {
var (
mu sync.Mutex
err error
)
subs := []*Sub{}
c, _ := net.Dial("tcp", _testAddr)
sub, err := NewSub(newConn(c, time.Second, time.Second), _testGroup, _testTopic, "", _testConfig, 100)
if err != nil {
t.Fatal(err)
}
subs = append(subs, sub)
time.Sleep(time.Second * 5)
go func() {
for i := 0; i < 200; i++ {
sub, err := NewSub(newConn(c, time.Second, time.Second), _testGroup, _testTopic, "", _testConfig, 100)
if err != nil {
t.Errorf("NewSub error(%v)", err)
continue
}
mu.Lock()
subs = append(subs, sub)
mu.Unlock()
}
}()
time.Sleep(time.Second * 5)
for i := 0; i < 20; i++ {
sub, err := NewSub(newConn(c, time.Second, time.Second), _testGroup, _testTopic, "", _testConfig, 100)
if err != nil {
t.Fatal(err)
continue
}
mu.Lock()
subs = append(subs, sub)
mu.Unlock()
}
time.Sleep(time.Second * 5)
}