Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"group.go",
],
importpath = "go-common/library/queue/databus/databusutil",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/queue/databus:go_default_library",
"//library/time:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["group_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = [
"//library/log:go_default_library",
"//library/queue/databus:go_default_library",
"//library/sync/errgroup:go_default_library",
"//library/time:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,100 @@
/*
Package databusutil provides a util for building databus based async job with
single partition message aggregation and parallel consumption features.
Group
The group is the primary struct for working with this util.
Applications create groups by calling the package NewGroup function with a
databusutil config and a databus message chan.
To start a initiated group, the application must call the group Start method.
The application must call the group Close method when the application is
done with the group.
Callbacks
After a new group is created, the following callbacks: New, Split and Do must
be assigned, otherwise the job will not works as your expectation.
The callback New represents how the consume proc of the group parsing the target
object from a new databus message that it received for merging, if the error
returned is not nil, the consume proc will omit this message and continue.
A example of the callback New is:
func newTestMsg(msg *databus.Message) (res interface{}, err error) {
res = new(testMsg)
if err = json.Unmarshal(msg.Value, &res); err != nil {
log.Error("json.Unmarshal(%s) error(%v)", msg.Value, err)
}
return
}
The callback Split represents how the consume proc of the group getting the
sharding dimension from a databus message or the object parsed from the databus
message, it will be used along with the configuration item Num to decide which
merge goroutine to use to merge the parsed object. In more detail, if we take
the result of callback Split as sr, then the sharding result will be sr % Num.
A example of the callback Split is:
func split(msg *databus.Message, data interface{}) int {
t, ok := data.(*testMsg)
if !ok {
return 0
}
return int(t.Mid)
}
If your messages is already assigned to their partitions corresponding to the split you want,
you may want to directly use its partition as split, here is the example:
func anotherSplit(msg *databus.Message, data interface{}) int {
return int(msg.Partition)
}
Do not forget to ensure the max value your callback Split returns, as maxSplit,
greater than or equal to the configuration item Num, otherwise the merge
goroutines will not be fully used, in more detail, the last (Num - maxSplit)
merge goroutines are initiated by will never be used.
The callback Do represents how the merge proc of the group processing the merged
objects, define your business in it.
A example of the callback Do is:
func do(msgs []interface{}) {
for _, m := range msgs {
// process messages you merged here, the example type asserts and prints each
if msg, ok := m.(*testMsg); ok {
fmt.Printf("msg: %+v", msg)
}
}
}
Usage Example
The typical usage for databusutil is:
// new a databus to subscribe from
dsSub := databus.New(dsSubConf)
defer dsSub.Close()
// new a group
g := NewGroup(
c,
dsSub.Messages(),
)
// fill callbacks
g.New = yourNewFunc
g.Split = yourSplitFunc
g.Do = yourDoFunc
// start the group
g.Start()
// must close the group before the job exits
defer g.Close()
// signal handler
*/
package databusutil

View File

@@ -0,0 +1,223 @@
package databusutil
import (
"runtime"
"sync"
"time"
"go-common/library/queue/databus"
xtime "go-common/library/time"
)
const (
_stateStarted = 1
_stateClosed = 2
)
// Config the config is the base configuration for initiating a new group.
type Config struct {
// Size merge size
Size int
// Num merge goroutine num
Num int
// Ticker duration of submit merges when no new message
Ticker xtime.Duration
// Chan size of merge chan and done chan
Chan int
}
func (c *Config) fix() {
if c.Size <= 0 {
c.Size = 1024
}
if int64(c.Ticker) <= 0 {
c.Ticker = xtime.Duration(time.Second * 5)
}
if c.Num <= 0 {
c.Num = runtime.GOMAXPROCS(0)
}
if c.Chan <= 0 {
c.Chan = 1024
}
}
type message struct {
next *message
data *databus.Message
object interface{}
done bool
}
// Group group.
type Group struct {
c *Config
head, last *message
state int
mu sync.Mutex
mc []chan *message // merge chan
dc chan []*message // done chan
qc chan struct{} // quit chan
msg <-chan *databus.Message
New func(msg *databus.Message) (interface{}, error)
Split func(msg *databus.Message, data interface{}) int
Do func(msgs []interface{})
pool *sync.Pool
}
// NewGroup new a group.
func NewGroup(c *Config, m <-chan *databus.Message) *Group {
// NOTE if c || m == nil runtime panic
if c == nil {
c = new(Config)
}
c.fix()
g := &Group{
c: c,
msg: m,
mc: make([]chan *message, c.Num),
dc: make(chan []*message, c.Chan),
qc: make(chan struct{}),
pool: &sync.Pool{
New: func() interface{} {
return new(message)
},
},
}
for i := 0; i < c.Num; i++ {
g.mc[i] = make(chan *message, c.Chan)
}
return g
}
// Start start group, it is safe for concurrent use by multiple goroutines.
func (g *Group) Start() {
g.mu.Lock()
if g.state == _stateStarted {
g.mu.Unlock()
return
}
g.state = _stateStarted
g.mu.Unlock()
go g.consumeproc()
for i := 0; i < g.c.Num; i++ {
go g.mergeproc(g.mc[i])
}
go g.commitproc()
}
// Close close group, it is safe for concurrent use by multiple goroutines.
func (g *Group) Close() (err error) {
g.mu.Lock()
if g.state == _stateClosed {
g.mu.Unlock()
return
}
g.state = _stateClosed
g.mu.Unlock()
close(g.qc)
return
}
func (g *Group) message() *message {
return g.pool.Get().(*message)
}
func (g *Group) freeMessage(m *message) {
*m = message{}
g.pool.Put(m)
}
func (g *Group) consumeproc() {
var (
ok bool
err error
msg *databus.Message
)
for {
select {
case <-g.qc:
return
case msg, ok = <-g.msg:
if !ok {
g.Close()
return
}
}
// marked head to first commit
m := g.message()
m.data = msg
if m.object, err = g.New(msg); err != nil {
g.freeMessage(m)
continue
}
g.mu.Lock()
if g.head == nil {
g.head = m
g.last = m
} else {
g.last.next = m
g.last = m
}
g.mu.Unlock()
g.mc[g.Split(m.data, m.object)%g.c.Num] <- m
}
}
func (g *Group) mergeproc(mc <-chan *message) {
ticker := time.NewTicker(time.Duration(g.c.Ticker))
msgs := make([]interface{}, 0, g.c.Size)
marks := make([]*message, 0, g.c.Size)
for {
select {
case <-g.qc:
return
case msg := <-mc:
msgs = append(msgs, msg.object)
marks = append(marks, msg)
if len(msgs) < g.c.Size {
continue
}
case <-ticker.C:
}
if len(msgs) > 0 {
g.Do(msgs)
msgs = make([]interface{}, 0, g.c.Size)
}
if len(marks) > 0 {
g.dc <- marks
marks = make([]*message, 0, g.c.Size)
}
}
}
func (g *Group) commitproc() {
commits := make(map[int32]*databus.Message)
for {
select {
case <-g.qc:
return
case done := <-g.dc:
// merge partitions to commit offset
for _, d := range done {
d.done = true
}
g.mu.Lock()
for g.head != nil && g.head.done {
cur := g.head
commits[cur.data.Partition] = cur.data
g.head = cur.next
g.freeMessage(cur)
}
g.mu.Unlock()
for k, m := range commits {
m.Commit()
delete(commits, k)
}
}
}
}

View File

@@ -0,0 +1,392 @@
package databusutil
import (
"context"
"encoding/json"
"runtime"
"strconv"
"sync"
"testing"
"time"
"go-common/library/log"
"go-common/library/queue/databus"
"go-common/library/sync/errgroup"
xtime "go-common/library/time"
)
type testMsg struct {
Seq int64 `json:"seq"`
Mid int64 `json:"mid"`
Now int64 `json:"now"`
}
var (
_sendSeqsList = make([][]int64, _groupNum)
_recvSeqsList = make([][]int64, _groupNum)
_sMus = make([]sync.Mutex, _groupNum)
_rMus = make([]sync.Mutex, _groupNum)
_groupNum = 8
_tc = 20
_ts = time.Now().Unix()
_st = _ts - _ts%10 + 1000
_ed = _bSt + int64(_groupNum*_tc) - 1
_dsPubConf = &databus.Config{
Key: "0PvKGhAqDvsK7zitmS8t",
Secret: "0PvKGhAqDvsK7zitmS8u",
Group: "databus_test_group",
Topic: "databus_test_topic",
Action: "pub",
Name: "databus",
Proto: "tcp",
Addr: "172.16.33.158:6205",
Active: 1,
Idle: 1,
DialTimeout: xtime.Duration(time.Second),
WriteTimeout: xtime.Duration(time.Second),
ReadTimeout: xtime.Duration(time.Second),
IdleTimeout: xtime.Duration(time.Minute),
}
_dsSubConf = &databus.Config{
Key: "0PvKGhAqDvsK7zitmS8t",
Secret: "0PvKGhAqDvsK7zitmS8u",
Group: "databus_test_group",
Topic: "databus_test_topic",
Action: "sub",
Name: "databus",
Proto: "tcp",
Addr: "172.16.33.158:6205",
Active: 1,
Idle: 1,
DialTimeout: xtime.Duration(time.Second),
WriteTimeout: xtime.Duration(time.Second),
ReadTimeout: xtime.Duration(time.Second * 35),
IdleTimeout: xtime.Duration(time.Minute),
}
)
func TestGroup(t *testing.T) {
for i := 0; i < _groupNum; i++ {
_sendSeqsList[i] = make([]int64, 0)
_recvSeqsList[i] = make([]int64, 0)
}
taskCounts := taskCount(_groupNum, _st, _ed)
runtime.GOMAXPROCS(32)
log.Init(&log.Config{
Dir: "/data/log/queue",
})
c := &Config{
Size: 200,
Ticker: xtime.Duration(time.Second),
Num: _groupNum,
Chan: 1024,
}
dsSub := databus.New(_dsSubConf)
defer dsSub.Close()
group := NewGroup(
c,
dsSub.Messages(),
)
group.New = newTestMsg
group.Split = split
group.Do = do
eg, _ := errgroup.WithContext(context.Background())
// go produce test messages
eg.Go(func() error {
send(_st, _ed)
return nil
})
// go consume test messages
eg.Go(func() error {
group.Start()
defer group.Close()
m := make(map[int]struct{})
for len(m) < _groupNum {
for i := 0; i < _groupNum; i++ {
_, ok := m[i]
if ok {
continue
}
_rMus[i].Lock()
if len(_recvSeqsList[i]) == taskCounts[i] {
m[i] = struct{}{}
}
_rMus[i].Unlock()
log.Info("_recvSeqsList[%d] length: %d, expect: %d", i, len(_recvSeqsList[i]), taskCounts[i])
}
log.Info("m length: %d", len(m))
time.Sleep(time.Millisecond * 500)
}
// check seqs list, sendSeqsList and recvSeqsList will not change since now, so no need to lock
for num := 0; num < _groupNum; num++ {
sendSeqs := _sendSeqsList[num]
recvSeqs := _recvSeqsList[num]
if len(sendSeqs) != taskCounts[num] {
t.Errorf("sendSeqs length of proc %d is incorrect, expcted %d but got %d", num, taskCounts[num], len(sendSeqs))
t.FailNow()
}
if len(recvSeqs) != taskCounts[num] {
t.Errorf("recvSeqs length of proc %d is incorrect, expcted %d but got %d", num, taskCounts[num], len(recvSeqs))
t.FailNow()
}
for i := range recvSeqs {
if recvSeqs[i] != sendSeqs[i] {
t.Errorf("res is incorrect for proc %d, expcted recvSeqs[%d] equal to sendSeqs[%d] but not, recvSeqs[%d]: %d, sendSeqs[%d]: %d", num, i, i, i, recvSeqs[i], i, sendSeqs[i])
t.FailNow()
}
}
t.Logf("proc %d processed %d messages, expected %d messages, check ok", num, taskCounts[num], len(recvSeqs))
}
return nil
})
eg.Wait()
}
func do(msgs []interface{}) {
for _, m := range msgs {
if msg, ok := m.(*testMsg); ok {
shard := int(msg.Mid) % _groupNum
if msg.Seq < _st {
log.Info("proc %d processed old seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
continue
}
_rMus[shard].Lock()
_recvSeqsList[shard] = append(_recvSeqsList[shard], msg.Seq)
_rMus[shard].Unlock()
log.Info("proc %d processed seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
}
}
}
func send(st, ed int64) error {
dsPub := databus.New(_dsPubConf)
defer dsPub.Close()
ts := time.Now().Unix()
for i := st; i <= ed; i++ {
mid := int64(i)
seq := i
k := _dsPubConf.Topic + strconv.FormatInt(mid, 10)
n := &testMsg{
Seq: seq,
Mid: mid,
Now: ts,
}
dsPub.Send(context.TODO(), k, n)
// NOTE: sleep here to avoid network latency caused message out of sequence
time.Sleep(time.Millisecond * 500)
shard := int(mid) % _groupNum
_sMus[shard].Lock()
_sendSeqsList[shard] = append(_sendSeqsList[shard], seq)
_sMus[shard].Unlock()
}
return nil
}
func newTestMsg(msg *databus.Message) (res interface{}, err error) {
res = new(testMsg)
if err = json.Unmarshal(msg.Value, &res); err != nil {
log.Error("json.Unmarshal(%s) error(%v)", msg.Value, err)
}
return
}
func split(msg *databus.Message, data interface{}) int {
t, ok := data.(*testMsg)
if !ok {
return 0
}
return int(t.Mid)
}
func taskCount(num int, st, ed int64) []int {
res := make([]int, num)
for i := st; i <= ed; i++ {
res[int(i)%num]++
}
return res
}
func TestTaskCount(t *testing.T) {
groupNum := 10
c := 100
ts := time.Now().Unix()
st := ts - ts%10 + 1000
ed := st + int64(groupNum*c) - 1
res := taskCount(groupNum, st, ed)
for i, v := range res {
if v != c {
t.Errorf("res is incorrect, expected task count 10 for proc %d but got %d", i, v)
t.FailNow()
}
t.Logf("i: %d, v: %d", i, v)
}
}
var (
_bGroupNum = 3
_bSendSeqsList = make([][]int64, _bGroupNum)
_bRecvSeqsList = make([][]int64, _bGroupNum)
_bSMus = make([]sync.Mutex, _bGroupNum)
_bRMus = make([]sync.Mutex, _bGroupNum)
_bTc = 20
_bTs = time.Now().Unix()
_bSt = _bTs - _bTs%10 + 1000
_bEd = _bSt + int64(_bGroupNum*_bTc) - 1
_bTaskCounts = taskCount(_bGroupNum, _bSt, _bEd)
_blockDo = true
_blockDoMu sync.Mutex
_blocked = false
)
func TestGroup_Blocking(t *testing.T) {
for i := 0; i < _bGroupNum; i++ {
_bSendSeqsList[i] = make([]int64, 0)
_bRecvSeqsList[i] = make([]int64, 0)
}
runtime.GOMAXPROCS(32)
log.Init(&log.Config{
Dir: "/data/log/queue",
})
c := &Config{
Size: 20,
Ticker: xtime.Duration(time.Second),
Num: _bGroupNum,
Chan: 5,
}
dsSub := databus.New(_dsSubConf)
defer dsSub.Close()
g := NewGroup(
c,
dsSub.Messages(),
)
g.New = newTestMsg
g.Split = split
g.Do = func(msgs []interface{}) {
blockingDo(t, g, msgs)
}
eg, _ := errgroup.WithContext(context.Background())
// go produce test messages
eg.Go(func() error {
dsPub := databus.New(_dsPubConf)
defer dsPub.Close()
ts := time.Now().Unix()
for i := _bSt; i <= _bEd; i++ {
mid := int64(i)
seq := i
k := _dsPubConf.Topic + strconv.FormatInt(mid, 10)
n := &testMsg{
Seq: seq,
Mid: mid,
Now: ts,
}
dsPub.Send(context.TODO(), k, n)
// NOTE: sleep here to avoid network latency caused message out of sequence
time.Sleep(time.Millisecond * 500)
shard := int(mid) % _bGroupNum
_bSMus[shard].Lock()
_bSendSeqsList[shard] = append(_bSendSeqsList[shard], seq)
_bSMus[shard].Unlock()
}
return nil
})
// go consume test messages
eg.Go(func() error {
g.Start()
defer g.Close()
m := make(map[int]struct{})
// wait until all proc process theirs messages done
for len(m) < _bGroupNum {
for i := 0; i < _bGroupNum; i++ {
_, ok := m[i]
if ok {
continue
}
_bRMus[i].Lock()
if len(_bRecvSeqsList[i]) == _bTaskCounts[i] {
m[i] = struct{}{}
}
_bRMus[i].Unlock()
log.Info("_bRecvSeqsList[%d] length: %d, expect: %d, blockDo: %t", i, len(_bRecvSeqsList[i]), _bTaskCounts[i], _blockDo)
}
log.Info("m length: %d", len(m))
time.Sleep(time.Millisecond * 500)
}
return nil
})
eg.Wait()
}
func blockingDo(t *testing.T, g *Group, msgs []interface{}) {
_blockDoMu.Lock()
if !_blockDo {
_blockDoMu.Unlock()
processMsg(msgs)
return
}
// blocking to see if consume proc blocks finally
lastGLen := 0
cnt := 0
for i := 0; i < 60; i++ {
// print seqs status, not lock because final stable
for i, v := range _bRecvSeqsList {
log.Info("_bRecvSeqsList[%d] length: %d, expect: %d", i, len(v), _bTaskCounts[i])
}
gLen := 0
for h := g.head; h != nil; h = h.next {
gLen++
}
if gLen == lastGLen {
cnt++
} else {
cnt = 0
}
lastGLen = gLen
log.Info("blocking test: gLen: %d, cnt: %d, _bSt: %d, _bEd: %d", gLen, cnt, _bSt, _bEd)
if cnt == 5 {
_blocked = true
log.Info("blocking test: consumeproc now is blocked, now trying to unblocking do callback")
break
}
time.Sleep(time.Millisecond * 500)
}
// assert blocked
if !_blocked {
t.Errorf("res is incorrect, _blocked should be true but got false")
t.FailNow()
}
// unblocking and check if consume proc unblocking too
_blockDo = false
_blockDoMu.Unlock()
processMsg(msgs)
}
func processMsg(msgs []interface{}) {
for _, m := range msgs {
if msg, ok := m.(*testMsg); ok {
shard := int(msg.Mid) % _bGroupNum
if msg.Seq < _bSt {
log.Info("proc %d processed old seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
continue
}
_bRMus[shard].Lock()
_bRecvSeqsList[shard] = append(_bRecvSeqsList[shard], msg.Seq)
log.Info("appended: %d", msg.Seq)
_bRMus[shard].Unlock()
log.Info("proc %d processed seq: %d, mid: %d", shard, msg.Seq, msg.Mid)
}
}
}