Create & Init Project...

2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions
--- a/library/queue/databus/databusutil/BUILD.bazel
+++ b/library/queue/databus/databusutil/BUILD.bazel
@@ -0,0 +1,48 @@
+load(
+    "@io_bazel_rules_go//go:def.bzl",
+    "go_library",
+    "go_test",
+)
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "doc.go",
+        "group.go",
+    ],
+    importpath = "go-common/library/queue/databus/databusutil",
+    tags = ["automanaged"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//library/queue/databus:go_default_library",
+        "//library/time:go_default_library",
+    ],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = ["group_test.go"],
+    embed = [":go_default_library"],
+    rundir = ".",
+    tags = ["automanaged"],
+    deps = [
+        "//library/log:go_default_library",
+        "//library/queue/databus:go_default_library",
+        "//library/sync/errgroup:go_default_library",
+        "//library/time:go_default_library",
+    ],
+)
+
+filegroup(
+    name = "package-srcs",
+    srcs = glob(["**"]),
+    tags = ["automanaged"],
+    visibility = ["//visibility:private"],
+)
+
+filegroup(
+    name = "all-srcs",
+    srcs = [":package-srcs"],
+    tags = ["automanaged"],
+    visibility = ["//visibility:public"],
+)
--- a/library/queue/databus/databusutil/doc.go
+++ b/library/queue/databus/databusutil/doc.go
@@ -0,0 +1,100 @@
+/*
+Package databusutil provides a util for building databus based async job with
+single partition message aggregation and parallel consumption features.
+
+Group
+
+The group is the primary struct for working with this util.
+
+Applications create groups by calling the package NewGroup function with a
+databusutil config and a databus message chan.
+
+To start a initiated group, the application must call the group Start method.
+
+The application must call the group Close method when the application is
+done with the group.
+
+Callbacks
+
+After a new group is created, the following callbacks: New, Split and Do must
+be assigned, otherwise the job will not works as your expectation.
+
+The callback New represents how the consume proc of the group parsing the target
+object from a new databus message that it received for merging, if the error
+returned is not nil, the consume proc will omit this message and continue.
+
+A example of the callback New is:
+
+	func newTestMsg(msg *databus.Message) (res interface{}, err error) {
+		res = new(testMsg)
+		if err = json.Unmarshal(msg.Value, &res); err != nil {
+			log.Error("json.Unmarshal(%s) error(%v)", msg.Value, err)
+		}
+		return
+	}
+
+The callback Split represents how the consume proc of the group getting the
+sharding dimension from a databus message or the object parsed from the databus
+message, it will be used along with the configuration item Num to decide which
+merge goroutine to use to merge the parsed object. In more detail, if we take
+the result of callback Split as sr, then the sharding result will be sr % Num.
+
+A example of the callback Split is:
+
+	func split(msg *databus.Message, data interface{}) int {
+		t, ok := data.(*testMsg)
+		if !ok {
+			return 0
+		}
+		return int(t.Mid)
+	}
+
+If your messages is already assigned to their partitions corresponding to the split you want,
+you may want to directly use its partition as split, here is the example:
+
+	func anotherSplit(msg *databus.Message, data interface{}) int {
+		return int(msg.Partition)
+	}
+
+Do not forget to ensure the max value your callback Split returns, as maxSplit,
+greater than or equal to the configuration item Num, otherwise the merge
+goroutines will not be fully used, in more detail, the last (Num - maxSplit)
+merge goroutines are initiated by will never be used.
+
+The callback Do represents how the merge proc of the group processing the merged
+objects, define your business in it.
+
+A example of the callback Do is:
+
+	func do(msgs []interface{}) {
+		for _, m := range msgs {
+			// process messages you merged here, the example type asserts and prints each
+			if msg, ok := m.(*testMsg); ok {
+				fmt.Printf("msg: %+v", msg)
+			}
+		}
+	}
+
+Usage Example
+
+The typical usage for databusutil is:
+
+	// new a databus to subscribe from
+	dsSub := databus.New(dsSubConf)
+	defer dsSub.Close()
+	// new a group
+	g := NewGroup(
+		c,
+		dsSub.Messages(),
+	)
+	// fill callbacks
+	g.New = yourNewFunc
+	g.Split = yourSplitFunc
+	g.Do = yourDoFunc
+	// start the group
+	g.Start()
+	// must close the group before the job exits
+	defer g.Close()
+	// signal handler
+*/
+package databusutil
--- a/library/queue/databus/databusutil/group.go
+++ b/library/queue/databus/databusutil/group.go
@@ -0,0 +1,223 @@
+package databusutil
+
+import (
+	"runtime"
+	"sync"
+	"time"
+
+	"go-common/library/queue/databus"
+	xtime "go-common/library/time"
+)
+
+const (
+	_stateStarted = 1
+	_stateClosed  = 2
+)
+
+// Config the config is the base configuration for initiating a new group.
+type Config struct {
+	// Size merge size
+	Size int
+	// Num merge goroutine num
+	Num int
+	// Ticker duration of submit merges when no new message
+	Ticker xtime.Duration
+	// Chan size of merge chan and done chan
+	Chan int
+}
+
+func (c *Config) fix() {
+	if c.Size <= 0 {
+		c.Size = 1024
+	}
+	if int64(c.Ticker) <= 0 {
+		c.Ticker = xtime.Duration(time.Second * 5)
+	}
+	if c.Num <= 0 {
+		c.Num = runtime.GOMAXPROCS(0)
+	}
+	if c.Chan <= 0 {
+		c.Chan = 1024
+	}
+}
+
+type message struct {
+	next   *message
+	data   *databus.Message
+	object interface{}
+	done   bool
+}
+
+// Group group.
+type Group struct {
+	c          *Config
+	head, last *message
+	state      int
+	mu         sync.Mutex
+
+	mc  []chan *message // merge chan
+	dc  chan []*message // done chan
+	qc  chan struct{}   // quit chan
+	msg <-chan *databus.Message
+
+	New   func(msg *databus.Message) (interface{}, error)
+	Split func(msg *databus.Message, data interface{}) int
+	Do    func(msgs []interface{})
+
+	pool *sync.Pool
+}
+
+// NewGroup new a group.
+func NewGroup(c *Config, m <-chan *databus.Message) *Group {
+	// NOTE if c || m == nil runtime panic
+	if c == nil {
+		c = new(Config)
+	}
+	c.fix()
+	g := &Group{
+		c:   c,
+		msg: m,
+
+		mc: make([]chan *message, c.Num),
+		dc: make(chan []*message, c.Chan),
+		qc: make(chan struct{}),
+
+		pool: &sync.Pool{
+			New: func() interface{} {
+				return new(message)
+			},
+		},
+	}
+	for i := 0; i < c.Num; i++ {
+		g.mc[i] = make(chan *message, c.Chan)
+	}
+	return g
+}
+
+// Start start group, it is safe for concurrent use by multiple goroutines.
+func (g *Group) Start() {
+	g.mu.Lock()
+	if g.state == _stateStarted {
+		g.mu.Unlock()
+		return
+	}
+	g.state = _stateStarted
+	g.mu.Unlock()
+	go g.consumeproc()
+	for i := 0; i < g.c.Num; i++ {
+		go g.mergeproc(g.mc[i])
+	}
+	go g.commitproc()
+}
+
+// Close close group, it is safe for concurrent use by multiple goroutines.
+func (g *Group) Close() (err error) {
+	g.mu.Lock()
+	if g.state == _stateClosed {
+		g.mu.Unlock()
+		return
+	}
+	g.state = _stateClosed
+	g.mu.Unlock()
+	close(g.qc)
+	return
+}
+
+func (g *Group) message() *message {
+	return g.pool.Get().(*message)
+}
+
+func (g *Group) freeMessage(m *message) {
+	*m = message{}
+	g.pool.Put(m)
+}
+
+func (g *Group) consumeproc() {
+	var (
+		ok  bool
+		err error
+		msg *databus.Message
+	)
+	for {
+		select {
+		case <-g.qc:
+			return
+		case msg, ok = <-g.msg:
+			if !ok {
+				g.Close()
+				return
+			}
+		}
+		// marked head to first commit
+		m := g.message()
+		m.data = msg
+		if m.object, err = g.New(msg); err != nil {
+			g.freeMessage(m)
+			continue
+		}
+		g.mu.Lock()
+		if g.head == nil {
+			g.head = m
+			g.last = m
+		} else {
+			g.last.next = m
+			g.last = m
+		}
+		g.mu.Unlock()
+		g.mc[g.Split(m.data, m.object)%g.c.Num] <- m
+	}
+}
+
+func (g *Group) mergeproc(mc <-chan *message) {
+	ticker := time.NewTicker(time.Duration(g.c.Ticker))
+	msgs := make([]interface{}, 0, g.c.Size)
+	marks := make([]*message, 0, g.c.Size)
+	for {
+		select {
+		case <-g.qc:
+			return
+		case msg := <-mc:
+			msgs = append(msgs, msg.object)
+			marks = append(marks, msg)
+			if len(msgs) < g.c.Size {
+				continue
+			}
+		case <-ticker.C:
+		}
+		if len(msgs) > 0 {
+			g.Do(msgs)
+			msgs = make([]interface{}, 0, g.c.Size)
+		}
+		if len(marks) > 0 {
+			g.dc <- marks
+			marks = make([]*message, 0, g.c.Size)
+		}
+	}
+}
+
+func (g *Group) commitproc() {
+	commits := make(map[int32]*databus.Message)
+	for {
+		select {
+		case <-g.qc:
+			return
+		case done := <-g.dc:
+			// merge partitions to commit offset
+			for _, d := range done {
+				d.done = true
+			}
+			g.mu.Lock()
+			for g.head != nil && g.head.done {
+				cur := g.head
+				commits[cur.data.Partition] = cur.data
+				g.head = cur.next
+				g.freeMessage(cur)
+			}
+			g.mu.Unlock()
+			for k, m := range commits {
+				m.Commit()
+				delete(commits, k)
+			}
+		}
+	}
+}
--- a/library/queue/databus/databusutil/group_test.go
+++ b/library/queue/databus/databusutil/group_test.go
@@ -0,0 +1,392 @@
+package databusutil
+
+import (
+	"context"
+	"encoding/json"
+	"runtime"
+	"strconv"
+	"sync"
+	"testing"
+	"time"
+
+	"go-common/library/log"
+	"go-common/library/queue/databus"
+	"go-common/library/sync/errgroup"
+	xtime "go-common/library/time"
+)
+
+type testMsg struct {
+	Seq int64 `json:"seq"`
+	Mid int64 `json:"mid"`
+	Now int64 `json:"now"`
+}
+
+var (
+	_sendSeqsList = make([][]int64, _groupNum)
+	_recvSeqsList = make([][]int64, _groupNum)
+
+	_sMus = make([]sync.Mutex, _groupNum)
+	_rMus = make([]sync.Mutex, _groupNum)
+
+	_groupNum = 8
+
+	_tc = 20
+	_ts = time.Now().Unix()
+	_st = _ts - _ts%10 + 1000
+	_ed = _bSt + int64(_groupNum*_tc) - 1
+
+	_dsPubConf = &databus.Config{
+		Key:          "0PvKGhAqDvsK7zitmS8t",
+		Secret:       "0PvKGhAqDvsK7zitmS8u",
+		Group:        "databus_test_group",
+		Topic:        "databus_test_topic",
+		Action:       "pub",
+		Name:         "databus",
+		Proto:        "tcp",
+		Addr:         "172.16.33.158:6205",
+		Active:       1,
+		Idle:         1,
+		DialTimeout:  xtime.Duration(time.Second),
+		WriteTimeout: xtime.Duration(time.Second),
+		ReadTimeout:  xtime.Duration(time.Second),
+		IdleTimeout:  xtime.Duration(time.Minute),
+	}
+
+	_dsSubConf = &databus.Config{
+		Key:          "0PvKGhAqDvsK7zitmS8t",
+		Secret:       "0PvKGhAqDvsK7zitmS8u",
+		Group:        "databus_test_group",
+		Topic:        "databus_test_topic",
+		Action:       "sub",
+		Name:         "databus",
+		Proto:        "tcp",
+		Addr:         "172.16.33.158:6205",
+		Active:       1,
+		Idle:         1,
+		DialTimeout:  xtime.Duration(time.Second),
+		WriteTimeout: xtime.Duration(time.Second),
+		ReadTimeout:  xtime.Duration(time.Second * 35),
+		IdleTimeout:  xtime.Duration(time.Minute),
+	}
+)
+
+func TestGroup(t *testing.T) {
+	for i := 0; i < _groupNum; i++ {
+		_sendSeqsList[i] = make([]int64, 0)
+		_recvSeqsList[i] = make([]int64, 0)
+	}
+	taskCounts := taskCount(_groupNum, _st, _ed)
+
+	runtime.GOMAXPROCS(32)
+	log.Init(&log.Config{
+		Dir: "/data/log/queue",
+	})
+	c := &Config{
+		Size:   200,
+		Ticker: xtime.Duration(time.Second),
+		Num:    _groupNum,
+		Chan:   1024,
+	}
+	dsSub := databus.New(_dsSubConf)
+	defer dsSub.Close()
+	group := NewGroup(
+		c,
+		dsSub.Messages(),
+	)
+	group.New = newTestMsg
+	group.Split = split
+	group.Do = do
+	eg, _ := errgroup.WithContext(context.Background())
+	// go produce test messages
+	eg.Go(func() error {
+		send(_st, _ed)
+		return nil
+	})
+	// go consume test messages
+	eg.Go(func() error {
+		group.Start()
+		defer group.Close()
+		m := make(map[int]struct{})
+		for len(m) < _groupNum {
+			for i := 0; i < _groupNum; i++ {
+				_, ok := m[i]
+				if ok {
+					continue
+				}
+				_rMus[i].Lock()
+				if len(_recvSeqsList[i]) == taskCounts[i] {
+					m[i] = struct{}{}
+				}
+				_rMus[i].Unlock()
+				log.Info("_recvSeqsList[%d] length: %d, expect: %d", i, len(_recvSeqsList[i]), taskCounts[i])
+			}
+			log.Info("m length: %d", len(m))
+			time.Sleep(time.Millisecond * 500)
+		}
+		// check seqs list, sendSeqsList and recvSeqsList will not change since now, so no need to lock
+		for num := 0; num < _groupNum; num++ {
+			sendSeqs := _sendSeqsList[num]
+			recvSeqs := _recvSeqsList[num]
+			if len(sendSeqs) != taskCounts[num] {
+				t.Errorf("sendSeqs length of proc %d is incorrect, expcted %d but got %d", num, taskCounts[num], len(sendSeqs))
+				t.FailNow()
+			}
+			if len(recvSeqs) != taskCounts[num] {
+				t.Errorf("recvSeqs length of proc %d is incorrect, expcted %d but got %d", num, taskCounts[num], len(recvSeqs))
+				t.FailNow()
+			}
+			for i := range recvSeqs {
+				if recvSeqs[i] != sendSeqs[i] {
+					t.Errorf("res is incorrect for proc %d, expcted recvSeqs[%d] equal to sendSeqs[%d] but not, recvSeqs[%d]: %d, sendSeqs[%d]: %d", num, i, i, i, recvSeqs[i], i, sendSeqs[i])
+					t.FailNow()
+				}
+			}
+			t.Logf("proc %d processed %d messages, expected %d messages, check ok", num, taskCounts[num], len(recvSeqs))
+		}
+		return nil
+	})
+	eg.Wait()
+}
+
+func do(msgs []interface{}) {
+	for _, m := range msgs {
+		if msg, ok := m.(*testMsg); ok {
+			shard := int(msg.Mid) % _groupNum
+			if msg.Seq < _st {
+				log.Info("proc %d processed old seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
+				continue
+			}
+			_rMus[shard].Lock()
+			_recvSeqsList[shard] = append(_recvSeqsList[shard], msg.Seq)
+			_rMus[shard].Unlock()
+			log.Info("proc %d processed seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
+		}
+	}
+}
+
+func send(st, ed int64) error {
+	dsPub := databus.New(_dsPubConf)
+	defer dsPub.Close()
+	ts := time.Now().Unix()
+	for i := st; i <= ed; i++ {
+		mid := int64(i)
+		seq := i
+		k := _dsPubConf.Topic + strconv.FormatInt(mid, 10)
+		n := &testMsg{
+			Seq: seq,
+			Mid: mid,
+			Now: ts,
+		}
+		dsPub.Send(context.TODO(), k, n)
+		// NOTE: sleep here to avoid network latency caused message out of sequence
+		time.Sleep(time.Millisecond * 500)
+		shard := int(mid) % _groupNum
+		_sMus[shard].Lock()
+		_sendSeqsList[shard] = append(_sendSeqsList[shard], seq)
+		_sMus[shard].Unlock()
+	}
+	return nil
+}
+
+func newTestMsg(msg *databus.Message) (res interface{}, err error) {
+	res = new(testMsg)
+	if err = json.Unmarshal(msg.Value, &res); err != nil {
+		log.Error("json.Unmarshal(%s) error(%v)", msg.Value, err)
+	}
+	return
+}
+
+func split(msg *databus.Message, data interface{}) int {
+	t, ok := data.(*testMsg)
+	if !ok {
+		return 0
+	}
+	return int(t.Mid)
+}
+
+func taskCount(num int, st, ed int64) []int {
+	res := make([]int, num)
+	for i := st; i <= ed; i++ {
+		res[int(i)%num]++
+	}
+	return res
+}
+
+func TestTaskCount(t *testing.T) {
+	groupNum := 10
+	c := 100
+	ts := time.Now().Unix()
+	st := ts - ts%10 + 1000
+	ed := st + int64(groupNum*c) - 1
+	res := taskCount(groupNum, st, ed)
+	for i, v := range res {
+		if v != c {
+			t.Errorf("res is incorrect, expected task count 10 for proc %d but got %d", i, v)
+			t.FailNow()
+		}
+		t.Logf("i: %d, v: %d", i, v)
+	}
+}
+
+var (
+	_bGroupNum = 3
+
+	_bSendSeqsList = make([][]int64, _bGroupNum)
+	_bRecvSeqsList = make([][]int64, _bGroupNum)
+
+	_bSMus = make([]sync.Mutex, _bGroupNum)
+	_bRMus = make([]sync.Mutex, _bGroupNum)
+
+	_bTc = 20
+	_bTs = time.Now().Unix()
+	_bSt = _bTs - _bTs%10 + 1000
+	_bEd = _bSt + int64(_bGroupNum*_bTc) - 1
+
+	_bTaskCounts = taskCount(_bGroupNum, _bSt, _bEd)
+
+	_blockDo   = true
+	_blockDoMu sync.Mutex
+
+	_blocked = false
+)
+
+func TestGroup_Blocking(t *testing.T) {
+	for i := 0; i < _bGroupNum; i++ {
+		_bSendSeqsList[i] = make([]int64, 0)
+		_bRecvSeqsList[i] = make([]int64, 0)
+	}
+
+	runtime.GOMAXPROCS(32)
+	log.Init(&log.Config{
+		Dir: "/data/log/queue",
+	})
+	c := &Config{
+		Size:   20,
+		Ticker: xtime.Duration(time.Second),
+		Num:    _bGroupNum,
+		Chan:   5,
+	}
+
+	dsSub := databus.New(_dsSubConf)
+	defer dsSub.Close()
+	g := NewGroup(
+		c,
+		dsSub.Messages(),
+	)
+	g.New = newTestMsg
+	g.Split = split
+	g.Do = func(msgs []interface{}) {
+		blockingDo(t, g, msgs)
+	}
+	eg, _ := errgroup.WithContext(context.Background())
+	// go produce test messages
+	eg.Go(func() error {
+		dsPub := databus.New(_dsPubConf)
+		defer dsPub.Close()
+		ts := time.Now().Unix()
+		for i := _bSt; i <= _bEd; i++ {
+			mid := int64(i)
+			seq := i
+			k := _dsPubConf.Topic + strconv.FormatInt(mid, 10)
+			n := &testMsg{
+				Seq: seq,
+				Mid: mid,
+				Now: ts,
+			}
+			dsPub.Send(context.TODO(), k, n)
+			// NOTE: sleep here to avoid network latency caused message out of sequence
+			time.Sleep(time.Millisecond * 500)
+			shard := int(mid) % _bGroupNum
+			_bSMus[shard].Lock()
+			_bSendSeqsList[shard] = append(_bSendSeqsList[shard], seq)
+			_bSMus[shard].Unlock()
+		}
+		return nil
+	})
+	// go consume test messages
+	eg.Go(func() error {
+		g.Start()
+		defer g.Close()
+		m := make(map[int]struct{})
+		// wait until all proc process theirs messages done
+		for len(m) < _bGroupNum {
+			for i := 0; i < _bGroupNum; i++ {
+				_, ok := m[i]
+				if ok {
+					continue
+				}
+				_bRMus[i].Lock()
+				if len(_bRecvSeqsList[i]) == _bTaskCounts[i] {
+					m[i] = struct{}{}
+				}
+				_bRMus[i].Unlock()
+				log.Info("_bRecvSeqsList[%d] length: %d, expect: %d, blockDo: %t", i, len(_bRecvSeqsList[i]), _bTaskCounts[i], _blockDo)
+			}
+			log.Info("m length: %d", len(m))
+			time.Sleep(time.Millisecond * 500)
+		}
+		return nil
+	})
+	eg.Wait()
+}
+
+func blockingDo(t *testing.T, g *Group, msgs []interface{}) {
+	_blockDoMu.Lock()
+	if !_blockDo {
+		_blockDoMu.Unlock()
+		processMsg(msgs)
+		return
+	}
+	// blocking to see if consume proc blocks finally
+	lastGLen := 0
+	cnt := 0
+	for i := 0; i < 60; i++ {
+		// print seqs status, not lock because final stable
+		for i, v := range _bRecvSeqsList {
+			log.Info("_bRecvSeqsList[%d] length: %d, expect: %d", i, len(v), _bTaskCounts[i])
+		}
+		gLen := 0
+		for h := g.head; h != nil; h = h.next {
+			gLen++
+		}
+		if gLen == lastGLen {
+			cnt++
+		} else {
+			cnt = 0
+		}
+		lastGLen = gLen
+		log.Info("blocking test: gLen: %d, cnt: %d, _bSt: %d, _bEd: %d", gLen, cnt, _bSt, _bEd)
+		if cnt == 5 {
+			_blocked = true
+			log.Info("blocking test: consumeproc now is blocked, now trying to unblocking do callback")
+			break
+		}
+		time.Sleep(time.Millisecond * 500)
+	}
+	// assert blocked
+	if !_blocked {
+		t.Errorf("res is incorrect, _blocked should be true but got false")
+		t.FailNow()
+	}
+	// unblocking and check if consume proc unblocking too
+	_blockDo = false
+	_blockDoMu.Unlock()
+	processMsg(msgs)
+}
+
+func processMsg(msgs []interface{}) {
+	for _, m := range msgs {
+		if msg, ok := m.(*testMsg); ok {
+			shard := int(msg.Mid) % _bGroupNum
+			if msg.Seq < _bSt {
+				log.Info("proc %d processed old seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
+				continue
+			}
+			_bRMus[shard].Lock()
+			_bRecvSeqsList[shard] = append(_bRecvSeqsList[shard], msg.Seq)
+			log.Info("appended: %d", msg.Seq)
+			_bRMus[shard].Unlock()
+			log.Info("proc %d processed seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
+		}
+	}
+}