Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/riot-search/benchmark:all-srcs",
"//app/service/main/riot-search/cmd:all-srcs",
"//app/service/main/riot-search/conf:all-srcs",
"//app/service/main/riot-search/dao:all-srcs",
"//app/service/main/riot-search/model:all-srcs",
"//app/service/main/riot-search/server/http:all-srcs",
"//app/service/main/riot-search/service:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,13 @@
### Riot-Search
### Version 1.0.3
> 1.unique return ids
### Version 1.0.2
> 1.ut
### Version 1.0.1
> 1.bug fix
### Version 1.0.0
> 1.init project

View File

@@ -0,0 +1,13 @@
# Owner
guanhuaxin
caoguoliang
yangjiankun
# Author
yangjiankun
daizhichen
# Reviewer
guanhuanxin
caoguoliang
yangjiankun

View File

@@ -0,0 +1,18 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- caoguoliang
- daizhichen
- guanhuaxin
- yangjiankun
labels:
- main
- service
- service/main/riot-search
options:
no_parent_owners: true
reviewers:
- caoguoliang
- daizhichen
- guanhuanxin
- yangjiankun

View File

@@ -0,0 +1,16 @@
# riot-search-service
# 项目简介
1.riot搜索引擎是一个全内存的搜索引擎riot (搜索引擎)只存储业务唯一标识(如稿件的aid),以及需要进行搜索的字段(如稿件的标题)
2.给定搜索范围(业务唯一标识的集合如aid的集合此字段为可选项不给定搜索范围则全量搜索),以及搜索的关键字,返回搜索的结果(搜索命中的关键字,以及搜索的结果)
3.提供分页,按搜索相关性排序等
# 编译环境
go 1.10
# 依赖包
github.com/go-ego/riot
# 编译执行
go build main.go

View File

@@ -0,0 +1 @@
# HTTP API文档

View File

@@ -0,0 +1,42 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "benchmark",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["benchmark.go"],
importpath = "go-common/app/service/main/riot-search/benchmark",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/riot-search/model:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/ivpusic/grpool:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,141 @@
package main
import (
"context"
"flag"
"math/rand"
"net/url"
"strconv"
"time"
"go-common/app/service/main/riot-search/model"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
xtime "go-common/library/time"
"github.com/ivpusic/grpool"
)
var (
minID uint64 = 1
maxID uint64 = 28731894
keyword = []string{"世界", "鬼畜", "自制", "搬运", "动漫", "崩坏", "搞笑", "德国", "弹幕", "乱入", "吸血鬼", "可怕", "骑士", "团长", "守护"}
times int
count int
thread int
client *bm.Client
args []*model.RiotSearchReq
uri string
maxElapsedTime []int64
avgElapsedTime []int64
)
//生成count个[start,end)结束的不重复的随机数
func generateRandomNumber(start uint64, end uint64, count int) []uint64 {
if end < start || (end-start) < uint64(count) {
return nil
}
nums := make([]uint64, 0)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for len(nums) < count {
num := uint64(r.Intn(int(end-start))) + start
exist := false
for _, v := range nums {
if v == num {
exist = true
break
}
}
if !exist {
nums = append(nums, num)
}
}
return nums
}
func benchmarkSearch(count int, times int, gid int) {
var totalTime int64
var maxTime int64
for i := 0; i < times; i++ {
// random chose params to use
arg := args[rand.Intn(len(args))]
params := url.Values{}
var aids string
for _, id := range arg.IDs {
aids += strconv.FormatUint(id, 10)
}
text := arg.Keyword
params.Set("aids", aids)
params.Set("keyword", text)
params.Set("pn", "1")
params.Set("ps", "20")
start := time.Now()
err := client.Post(context.TODO(), uri, "", params, nil)
if err != nil {
panic(err)
}
elapsed := time.Since(start)
if int64(elapsed) > maxTime {
maxTime = int64(elapsed)
}
totalTime += int64(elapsed)
}
avgElapsedTime[gid] = totalTime / (1000 * 1000 * int64(times))
maxElapsedTime[gid] = maxTime / (1000 * 1000)
}
func init() {
flag.IntVar(&times, "times", 100, "单个线程测试次数")
flag.IntVar(&count, "count", 100000, "每次测试aid个数")
flag.IntVar(&thread, "thread", 10, "线程数")
flag.StringVar(&uri, "uri", "http://127.0.0.1:7871/x/internal/riot-search/arc/ids", "请求url")
flag.Parse()
log.Info("times: %d, count:%d, thread:%d, uri:%s", times, count, thread, uri)
log.Info("init http client")
app := &bm.App{
Key: "test",
Secret: "test",
}
clientConf := &bm.ClientConfig{
App: app,
Timeout: xtime.Duration(time.Second * 1),
Dial: xtime.Duration(time.Second),
KeepAlive: xtime.Duration(time.Second * 60),
}
client = bm.NewClient(clientConf)
log.Info("init 10 http request params, random chose one to test")
args = make([]*model.RiotSearchReq, 10)
rand.Seed(time.Now().UnixNano())
for i := 0; i < 10; i++ {
arg := &model.RiotSearchReq{
IDs: generateRandomNumber(minID, maxID, count),
Keyword: keyword[rand.Intn(len(keyword))],
}
args[i] = arg
}
maxElapsedTime = make([]int64, thread)
avgElapsedTime = make([]int64, thread)
log.Info("init params finished")
}
func main() {
log.Info("start test")
if thread >= 1000 {
panic("thread large than 1000 is not allowed")
}
pool := grpool.NewPool(thread, 10240)
defer pool.Release()
pool.WaitCount(thread)
for i := 0; i < thread; i++ {
threadNum := i
pool.JobQueue <- func() {
benchmarkSearch(count, times, threadNum)
pool.JobDone()
}
}
pool.WaitAll()
log.Info("avg elapsed times list: %v", avgElapsedTime)
log.Info("max elapsed times list: %v", maxElapsedTime)
log.Info("test finished")
}

View File

@@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = [
"test.toml",
"ut.toml",
],
importpath = "go-common/app/service/main/riot-search/cmd",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/riot-search/conf:go_default_library",
"//app/service/main/riot-search/server/http:go_default_library",
"//library/log:go_default_library",
"//library/net/trace:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,41 @@
package main
import (
"flag"
"os"
"os/signal"
"syscall"
"go-common/app/service/main/riot-search/conf"
"go-common/app/service/main/riot-search/server/http"
"go-common/library/log"
"go-common/library/net/trace"
)
func main() {
flag.Parse()
if err := conf.Init(); err != nil {
panic(err)
}
log.Init(conf.Conf.Log)
defer log.Close()
log.Info("start")
trace.Init(conf.Conf.Tracer)
defer trace.Close()
// ecode.Init(conf.Conf.Ecode)
http.Init(conf.Conf)
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
for {
s := <-c
log.Info("get a signal %s", s.String())
switch s {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
log.Info("exit")
return
case syscall.SIGHUP:
default:
return
}
}
}

View File

@@ -0,0 +1,59 @@
[log]
dir = "/data/log/riot-search"
[Riot]
Dict = "/data/static/riot-search/dictionary.txt"
StopToken = "/data/static/riot-search/stop_tokens.txt"
NumShards = 4
Timeout = 800
FlushTime = 10
[BM]
addr = "0.0.0.0:7871"
timeout = "1s"
[HTTPClient]
key = "test"
secret = "test"
dial = "1s"
timeout = "1s"
keepAlive = "60s"
timer = 1000
[httpClient.breaker]
window = "10s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100
[Databus]
key = "2511663d546f1413"
secret = "cde3b480836cc76df3d635470f991caa"
group = "ArchiveNotify-MainSearch-S"
topic = "ArchiveNotify-T"
action ="sub"
name = "riot"
proto = "tcp"
addr = "172.18.33.50:6205"
idle = 1
active = 1
dialTimeout = "1s"
readTimeout = "40s"
writeTimeout = "1s"
idleTimeout = "60s"
[Mysql]
addr = "172.22.34.101:3306"
dsn = "bili_search:BzwF6Ez64RT6Yy2alSKDFGCPgGX5tMlj@tcp(172.22.34.101:3306)/bilibili_archive?timeout=5s&readTimeout=5s&writeTimeout=5s&parseTime=true&loc=Local&charset=utf8"
active = 5
idle = 2
idleTimeout ="4h"
queryTimeout = "1s"
execTimeout = "1s"
tranTimeout = "1s"
[mysql.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100

View File

@@ -0,0 +1,24 @@
UT = true
[Riot]
Dict = "/data/static/riot-search/dictionary.txt"
StopToken = "/data/static/riot-search/stop_tokens.txt"
NumShards = 4
Timeout = 800
FlushTime = 10
[Mysql]
addr = "172.22.34.101:3306"
dsn = "test_3306:UJPZaGKjpb2ylFx3HNhmLuwOYft4MCAi@tcp(172.22.34.101:3306)/bilibili_archive?timeout=5s&readTimeout=5s&writeTimeout=5s&parseTime=true&loc=Local&charset=utf8"
active = 5
idle = 2
idleTimeout ="4h"
queryTimeout = "1s"
execTimeout = "1s"
tranTimeout = "1s"
[mysql.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100

View File

@@ -0,0 +1,39 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["conf.go"],
importpath = "go-common/app/service/main/riot-search/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/conf:go_default_library",
"//library/database/sql:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
"//library/net/trace:go_default_library",
"//library/queue/databus:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,108 @@
package conf
import (
"errors"
"flag"
"go-common/library/conf"
"go-common/library/database/sql"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
"go-common/library/net/trace"
"go-common/library/queue/databus"
"github.com/BurntSushi/toml"
)
var (
confPath string
client *conf.Client
// Conf config
Conf = &Config{}
)
// Config .
type Config struct {
Log *log.Config
BM *bm.ServerConfig
Verify *verify.Config
Tracer *trace.Config
Ecode *ecode.Config
Riot *RiotConfig
HTTPClient *bm.ClientConfig
Databus *databus.Config
// db
Mysql *sql.Config
UT bool
}
// RiotConfig configs
type RiotConfig struct {
Dict string
StopToken string
LoadPath string
// timeout ms
Timeout int
// interval(second) to flush index
FlushTime int64
// 索引器shards数目
NumShards int
// 持久化文件数目尽量保证每个文件小于100M
StoreShards int
StoreFolder string
// 持久化存储引擎可选项bg(badger),leveldb,bolt
StoreEngine string
}
func init() {
flag.StringVar(&confPath, "conf", "", "default config path")
}
// Init init conf
func Init() error {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if load() != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Toml2(); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return errors.New("could not decode config")
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,58 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"dao.go",
"indexer.go",
"searcher.go",
],
importpath = "go-common/app/service/main/riot-search/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/riot-search/conf:go_default_library",
"//app/service/main/riot-search/model:go_default_library",
"//library/database/sql:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/go-ego/riot:go_default_library",
"//vendor/github.com/go-ego/riot/types:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"dao_test.go",
"indexer_test.go",
"searcher_test.go",
],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = [
"//app/service/main/riot-search/conf:go_default_library",
"//app/service/main/riot-search/model:go_default_library",
"//vendor/github.com/smartystreets/goconvey/convey:go_default_library",
],
)

View File

@@ -0,0 +1,99 @@
package dao
import (
"context"
"fmt"
"strings"
"time"
"go-common/app/service/main/riot-search/conf"
"go-common/app/service/main/riot-search/model"
"go-common/library/database/sql"
"go-common/library/log"
"github.com/go-ego/riot"
"github.com/go-ego/riot/types"
)
// 过审的增量数据
var _selIncrement = "SELECT id, title from archive where mtime>? and mtime<=?"
// Dao dao
type Dao struct {
c *conf.Config
searcher *riot.Engine
db *sql.DB
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
validateConfig(c.Riot)
if c.UT {
dao = &Dao{
c: c,
searcher: &riot.Engine{},
db: sql.NewMySQL(c.Mysql),
}
dao.searcher.Init(types.EngineOpts{})
return
}
dao = &Dao{
c: c,
searcher: &riot.Engine{},
// db
db: sql.NewMySQL(c.Mysql),
}
dao.searcher.Init(types.EngineOpts{
GseDict: c.Riot.Dict,
StopTokenFile: c.Riot.StopToken,
NumShards: c.Riot.NumShards,
IndexerOpts: &types.IndexerOpts{
IndexType: types.FrequenciesIndex,
DocCacheSize: 5000,
},
})
return
}
func validateConfig(conf *conf.RiotConfig) {
if conf.Dict == "" || conf.StopToken == "" {
panic("must provide a dict and stop_token file")
}
if conf.FlushTime <= 0 {
panic("flush time must larger than 0")
}
}
// Close close the resource.
func (d *Dao) Close() {
d.db.Close()
}
// Ping dao ping
func (d *Dao) Ping(c context.Context) error {
return d.db.Ping(c)
}
// IncrementBackup select mtime>now-24h data
func (d *Dao) IncrementBackup(c context.Context, stime, etime time.Time) (docs []*model.Document, err error) {
var states []int
for k := range model.PubStates.LegalStates {
states = append(states, k)
}
query := _selIncrement + " and state in (" + strings.Trim(strings.Join(strings.Split(fmt.Sprint(states), " "), ","), "[]") + ")" + " order by id asc"
rows, err := d.db.Query(c, query, stime, etime)
log.Info("exec query(%s) args(stime:%v, etime:%v)", query, stime, etime)
if err != nil {
return
}
defer rows.Close()
for rows.Next() {
doc := &model.Document{}
if err = rows.Scan(&doc.ID, &doc.Content); err != nil {
return
}
docs = append(docs, doc)
}
err = rows.Err()
return
}

View File

@@ -0,0 +1,67 @@
package dao
import (
"context"
"flag"
"os"
"testing"
"time"
"go-common/app/service/main/riot-search/conf"
"github.com/smartystreets/goconvey/convey"
)
var (
d *Dao
)
func TestMain(m *testing.M) {
if os.Getenv("DEPLOY_ENV") != "" {
flag.Set("app_id", "main.search.riot-search")
flag.Set("conf_token", "7cac78a7fdfe78c053879bf4dff0171b")
flag.Set("tree_id", "55087")
flag.Set("conf_version", "ut")
flag.Set("deploy_env", "uat")
flag.Set("conf_host", "config.bilibili.co")
flag.Set("conf_path", "/tmp")
flag.Set("region", "sh")
flag.Set("zone", "sh001")
} else {
flag.Set("conf", "../cmd/ut.toml")
}
flag.Parse()
if err := conf.Init(); err != nil {
panic(err)
}
d = New(conf.Conf)
os.Exit(m.Run())
}
// IncrementBackup ...
func TestIncrementBackup(t *testing.T) {
convey.Convey("IncrementBackup", t, func(ctx convey.C) {
ctx.Convey("When everything goes positive", func(ctx convey.C) {
p1, err := d.IncrementBackup(context.Background(), time.Now(), time.Now())
ctx.Convey("Error should be nil", func(ctx convey.C) {
ctx.So(err, convey.ShouldBeNil)
ctx.So(p1, convey.ShouldBeNil)
})
})
})
convey.Convey("Ping", t, func(ctx convey.C) {
ctx.Convey("When everything goes positive", func(ctx convey.C) {
err := d.Ping(context.Background())
ctx.Convey("Error should be nil", func(ctx convey.C) {
ctx.So(err, convey.ShouldBeNil)
})
})
})
convey.Convey("Close", t, func(ctx convey.C) {
ctx.Convey("When everything goes positive", func(ctx convey.C) {
d.Close()
ctx.Convey("No return values", func(ctx convey.C) {
})
})
})
}

View File

@@ -0,0 +1,20 @@
package dao
import (
"github.com/go-ego/riot/types"
)
// Insert doc into index...
func (d *Dao) Insert(id uint64, content string, forceUpdate bool) {
d.searcher.Index(id, types.DocData{Content: content}, forceUpdate)
}
// Flush force update data from cache to index
func (d *Dao) Flush() {
d.searcher.Flush()
}
// Remove remove a doc from index
func (d *Dao) Remove(id uint64, forceUpdate bool) {
d.searcher.RemoveDoc(id, forceUpdate)
}

View File

@@ -0,0 +1,46 @@
package dao
import (
"testing"
"github.com/smartystreets/goconvey/convey"
)
func TestDaoInsert(t *testing.T) {
convey.Convey("Insert", t, func(ctx convey.C) {
var (
id = uint64(0)
content = ""
forceUpdate bool
)
ctx.Convey("When everything goes positive", func(ctx convey.C) {
d.Insert(id, content, forceUpdate)
ctx.Convey("No return values", func(ctx convey.C) {
})
})
})
}
func TestDaoFlush(t *testing.T) {
convey.Convey("Flush", t, func(ctx convey.C) {
ctx.Convey("When everything goes positive", func(ctx convey.C) {
d.Flush()
ctx.Convey("No return values", func(ctx convey.C) {
})
})
})
}
func TestDaoRemove(t *testing.T) {
convey.Convey("Remove", t, func(ctx convey.C) {
var (
id = uint64(0)
forceUpdate bool
)
ctx.Convey("When everything goes positive", func(ctx convey.C) {
d.Remove(id, forceUpdate)
ctx.Convey("No return values", func(ctx convey.C) {
})
})
})
}

View File

@@ -0,0 +1,108 @@
package dao
import (
"go-common/app/service/main/riot-search/model"
"github.com/go-ego/riot/types"
)
// SearchIDOnly only return aids
func (d *Dao) SearchIDOnly(arg *model.RiotSearchReq) *model.IDsResp {
if arg.Keyword == "" {
return nil
}
var docIDs map[uint64]bool
if len(arg.IDs) != 0 {
docIDs = make(map[uint64]bool, len(arg.IDs))
for _, id := range arg.IDs {
docIDs[id] = true
}
}
output := d.searcher.Search(types.SearchReq{
Text: arg.Keyword,
DocIds: docIDs,
Timeout: d.c.Riot.Timeout,
RankOpts: &types.RankOpts{
// 从第几条结果开始输出
OutputOffset: (arg.Pn - 1) * arg.Ps,
// 最大输出的搜索结果数,为 0 时无限制
MaxOutputs: arg.Ps,
},
})
docLength := len(output.Docs.(types.ScoredDocs))
tokenLength := len(output.Tokens)
res := &model.IDsResp{
IDs: make([]uint64, docLength),
Tokens: make([]string, tokenLength),
Page: &model.Page{
PageNum: arg.Pn,
PageSize: arg.Ps,
Total: docLength,
},
}
for i, doc := range output.Docs.(types.ScoredDocs) {
res.IDs[i] = doc.DocId
}
copy(res.Tokens, output.Tokens)
res.IDs, res.Page.Total = uniqueIDs(res.IDs)
return res
}
func uniqueIDs(IDs []uint64) (uIDs []uint64, length int) {
m := make(map[uint64]struct{})
for _, ID := range IDs {
if _, ok := m[ID]; !ok {
m[ID] = struct{}{}
uIDs = append(uIDs, ID)
length++
}
}
return
}
// Search return archives info
func (d *Dao) Search(arg *model.RiotSearchReq) *model.DocumentsResp {
if arg.Keyword == "" {
return nil
}
var docIDs map[uint64]bool
if len(arg.IDs) != 0 {
docIDs = make(map[uint64]bool, len(arg.IDs))
for _, id := range arg.IDs {
docIDs[id] = true
}
}
output := d.searcher.Search(types.SearchReq{
Text: arg.Keyword,
DocIds: docIDs,
Timeout: d.c.Riot.Timeout,
RankOpts: &types.RankOpts{
// 从第几条结果开始输出
OutputOffset: (arg.Pn - 1) * arg.Ps,
// 最大输出的搜索结果数,为 0 时无限制
MaxOutputs: arg.Ps,
},
})
docLength := len(output.Docs.(types.ScoredDocs))
tokenLength := len(output.Tokens)
res := &model.DocumentsResp{
Documents: make([]model.Document, docLength),
Tokens: make([]string, tokenLength),
Page: &model.Page{
PageNum: arg.Pn,
PageSize: arg.Ps,
Total: docLength,
},
}
for i, doc := range output.Docs.(types.ScoredDocs) {
res.Documents[i].ID = doc.DocId
res.Documents[i].Content = doc.Content
}
copy(res.Tokens, output.Tokens)
return res
}
// Has return DocId exists
func (d *Dao) Has(id uint64) bool {
return d.searcher.HasDoc(id)
}

View File

@@ -0,0 +1,57 @@
package dao
import (
"go-common/app/service/main/riot-search/model"
"testing"
"github.com/smartystreets/goconvey/convey"
)
func TestDaoSearchIDOnly(t *testing.T) {
convey.Convey("SearchIDOnly", t, func(ctx convey.C) {
var (
arg1 = &model.RiotSearchReq{}
arg2 = &model.RiotSearchReq{Keyword: "test", IDs: []uint64{1}}
)
ctx.Convey("When everything goes positive", func(ctx convey.C) {
p1 := d.SearchIDOnly(arg1)
ctx.Convey("Then p1 should be nil.", func(ctx convey.C) {
ctx.So(p1, convey.ShouldBeNil)
})
p2 := d.SearchIDOnly(arg2)
ctx.Convey("Then p2 should not be nil.", func(ctx convey.C) {
ctx.So(p2, convey.ShouldNotBeNil)
})
})
})
}
func TestDaoSearch(t *testing.T) {
convey.Convey("Search", t, func(ctx convey.C) {
var (
arg1 = &model.RiotSearchReq{}
arg2 = &model.RiotSearchReq{Keyword: "test", IDs: []uint64{1}}
)
ctx.Convey("When everything goes positive", func(ctx convey.C) {
p1 := d.Search(arg1)
ctx.Convey("Then p1 should be nil.", func(ctx convey.C) {
ctx.So(p1, convey.ShouldBeNil)
})
p2 := d.Search(arg2)
ctx.Convey("Then p2 should not be nil.", func(ctx convey.C) {
ctx.So(p2, convey.ShouldNotBeNil)
})
})
})
}
func TestDaoHas(t *testing.T) {
convey.Convey("Search", t, func(ctx convey.C) {
ctx.Convey("When everything goes positive", func(ctx convey.C) {
p1 := d.Has(1)
ctx.Convey("Then p1 should be false.", func(ctx convey.C) {
ctx.So(p1, convey.ShouldBeFalse)
})
})
})
}

View File

@@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["model.go"],
importpath = "go-common/app/service/main/riot-search/model",
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,82 @@
package model
// Document id and content
type Document struct {
ID uint64 `json:"id"`
Content string `json:"content"`
}
// RiotSearchReq search request params
type RiotSearchReq struct {
IDs []uint64 `form:"ids,split"`
Keyword string `form:"keyword" validate:"required"`
Pn int `form:"pn" validate:"min=1"`
Ps int `form:"ps" validate:"min=0"`
}
// IDsResp resp of ids
type IDsResp struct {
IDs []uint64 `json:"ids"`
Tokens []string `json:"tokens"`
Page *Page `json:"page"`
}
// DocumentsResp resp of documents
type DocumentsResp struct {
Documents []Document `json:"ducuments"`
Tokens []string `json:"tokens"`
Page *Page `json:"page"`
}
// Page Pager
type Page struct {
PageNum int `json:"pn"`
PageSize int `json:"ps"`
Total int `json:"total"`
}
// **********************
// * Model for archives *
// **********************
// ArchiveMessage databus message
type ArchiveMessage struct {
Action string `json:"action"`
Table string `json:"table"`
New *ArchiveMeta `json:"new"`
Old *ArchiveMeta `json:"old"`
}
// ArchiveMeta Archive Metadata
type ArchiveMeta struct {
AID uint64 `json:"aid"`
Title string `json:"title"`
State int `json:"state"`
}
// States archive states
type States struct {
LegalStates map[int]bool
}
// PubStates publish states
var PubStates = &States{
LegalStates: map[int]bool{
-40: true,
0: true,
10000: true,
1: true,
1001: true,
15000: true,
20000: true,
30000: true,
},
}
// Legal return leagal
func (l *States) Legal(state int) bool {
if _, ok := l.LegalStates[state]; ok {
return true
}
return false
}

View File

@@ -0,0 +1,36 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["http.go"],
importpath = "go-common/app/service/main/riot-search/server/http",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/riot-search/conf:go_default_library",
"//app/service/main/riot-search/model:go_default_library",
"//app/service/main/riot-search/service:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,86 @@
package http
import (
"net/http"
"go-common/app/service/main/riot-search/conf"
"go-common/app/service/main/riot-search/model"
"go-common/app/service/main/riot-search/service"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
)
var (
srv *service.Service
vfy *verify.Verify
)
// Init init
func Init(c *conf.Config) {
srv = service.New(c)
vfy = verify.New(c.Verify)
engine := bm.DefaultServer(c.BM)
router(engine)
if err := engine.Start(); err != nil {
log.Error("xhttp.Serve error(%v)", err)
panic(err)
}
}
func router(e *bm.Engine) {
e.Ping(ping)
e.Register(register)
g := e.Group("/x/internal/riot-search")
{
g.POST("/arc/ids", vfy.Verify, searchIDOnly)
g.POST("/arc/contents", vfy.Verify, search)
// debug api
g.GET("/arc/has", has)
}
}
func ping(c *bm.Context) {
if err := srv.Ping(c); err != nil {
log.Error("ping error(%v)", err)
c.AbortWithStatus(http.StatusServiceUnavailable)
}
}
func register(c *bm.Context) {
c.JSON(map[string]interface{}{}, nil)
}
// @params RiotSearchReq
// @router post /x/riot-search/arc/aids
// @response IDsResp
func searchIDOnly(c *bm.Context) {
req := new(model.RiotSearchReq)
if err := c.Bind(req); err != nil {
log.Error("request param(%v) error", req)
return
}
c.JSON(srv.SearchIDOnly(c, req), nil)
}
// @params RiotSearchReq
// @router post /x/riot-search/arc/contents
// @response DocumentsResp
func search(c *bm.Context) {
req := new(model.RiotSearchReq)
if err := c.Bind(req); err != nil {
log.Error("request param(%v) error", req)
return
}
c.JSON(srv.Search(c, req), nil)
}
func has(c *bm.Context) {
req := new(struct {
ID uint64 `form:"id" validate:"min=0"`
})
if err := c.Bind(req); err != nil {
return
}
c.JSON(srv.Has(c, req.ID), nil)
}

View File

@@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"databus.go",
"load.go",
"search.go",
"service.go",
],
importpath = "go-common/app/service/main/riot-search/service",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/riot-search/conf:go_default_library",
"//app/service/main/riot-search/dao:go_default_library",
"//app/service/main/riot-search/model:go_default_library",
"//library/log:go_default_library",
"//library/queue/databus:go_default_library",
"//vendor/github.com/ivpusic/grpool:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,65 @@
package service
import (
"encoding/json"
"go-common/app/service/main/riot-search/model"
"go-common/library/log"
)
func (s *Service) watcherproc() {
defer func() {
log.Error("watcherproc quit")
}()
msgs := s.databus.Messages()
states := model.PubStates
for {
msg, ok := <-msgs
if !ok {
log.Error("s.event.Messages closed")
return
}
msg.Commit()
log.Info("key(%s) value(%s) partition(%d) offset(%d) commit", msg.Key, msg.Value, msg.Partition, msg.Offset)
var m model.ArchiveMessage
if err := json.Unmarshal(msg.Value, &m); err != nil {
log.Error("json.Unmarshal(%s) error(%v)", string(msg.Value), err)
continue
}
if m.Table != "archive" {
continue
}
if m.New == nil {
log.Error("dirty data from databus value(%v)", string(msg.Value))
continue
}
switch m.Action {
case "insert":
if states.Legal(m.New.State) {
s.pool.JobQueue <- func() {
s.dao.Insert(m.New.AID, m.New.Title, false)
log.Info("riot: insert data into index id(%d) content(%s)", m.New.AID, m.New.Title)
}
}
case "update":
if m.Old == nil {
log.Error("dirty data from databus value(%v)", msg.Value)
continue
} else if states.Legal(m.New.State) && m.New.Title != m.Old.Title {
s.pool.JobQueue <- func() {
s.dao.Insert(m.New.AID, m.New.Title, false)
log.Info("riot: update data into index id(%d) content(%s)", m.New.AID, m.New.Title)
}
} else if !states.Legal(m.New.State) && states.Legal(m.Old.State) {
s.pool.JobQueue <- func() {
s.dao.Remove(m.New.AID, false)
log.Info("riot: remove data id(%d) state(%d)", m.New.AID, m.New.State)
}
} else {
log.Info("ignore action(%s) value(%s)", m.Action, msg.Value)
}
default:
}
}
}

View File

@@ -0,0 +1,87 @@
package service
import (
"bufio"
"context"
"encoding/csv"
"io"
"os"
"strconv"
"time"
"go-common/app/service/main/riot-search/model"
"go-common/library/log"
)
func (s *Service) loadproc(path string) {
log.Info("loading csv file %s", path)
file, err := os.Open(path)
if err != nil {
log.Error("load csv file failed error:(%v)", err)
panic(err)
}
defer file.Close()
reader := csv.NewReader(bufio.NewReader(file))
reader.FieldsPerRecord = 2
reader.LazyQuotes = true
var arc []*model.Document
for {
line, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
log.Error("read file error: %v line %v", err, line)
// panic(err)
continue
}
aid, err := strconv.ParseUint(line[0], 10, 64)
if err != nil {
log.Error("illegal line %v", line)
panic(err)
}
title := line[1]
arc = append(arc, &model.Document{
ID: aid,
Content: title,
})
}
log.Info("adding csv data to search engine...")
s.pool.WaitCount(len(arc))
for index := range arc {
i := index
s.pool.JobQueue <- func() {
s.dao.Insert(arc[i].ID, arc[i].Content, false)
s.pool.JobDone()
}
}
s.pool.WaitAll()
s.dao.Flush()
/*
*********************
* load data from db *
*********************
*/
stime := time.Now().Add(-24 * time.Hour)
log.Info("sync increment data (mtime>%v) from database", stime)
for i := 0; i < 12; i++ {
etime := stime.Add(2 * time.Hour)
incrData, err := s.dao.IncrementBackup(context.Background(), stime, etime)
stime = etime
if err != nil {
log.Error("database error:(%v)", err)
s.dao.Flush()
return
}
s.pool.WaitCount(len(incrData))
for index := range incrData {
i := index
s.pool.JobQueue <- func() {
s.dao.Insert(incrData[i].ID, incrData[i].Content, false)
s.pool.JobDone()
}
}
s.pool.WaitAll()
}
s.dao.Flush()
log.Info("finish load data")
}

View File

@@ -0,0 +1,24 @@
package service
import (
"context"
"go-common/app/service/main/riot-search/model"
)
// SearchIDOnly return ID Only
func (s *Service) SearchIDOnly(c context.Context, arg *model.RiotSearchReq) (res *model.IDsResp) {
res = s.dao.SearchIDOnly(arg)
return
}
// Search return both id and content
func (s *Service) Search(c context.Context, arg *model.RiotSearchReq) (res *model.DocumentsResp) {
res = s.dao.Search(arg)
return
}
// Has return DocId exist
func (s *Service) Has(c context.Context, id uint64) bool {
return s.dao.Has(id)
}

View File

@@ -0,0 +1,45 @@
package service
import (
"context"
"runtime"
"go-common/app/service/main/riot-search/conf"
"go-common/app/service/main/riot-search/dao"
"go-common/library/queue/databus"
"github.com/ivpusic/grpool"
)
// Service struct
type Service struct {
c *conf.Config
dao *dao.Dao
databus *databus.Databus
pool *grpool.Pool
}
// New init
func New(c *conf.Config) (s *Service) {
s = &Service{
c: c,
dao: dao.New(c),
databus: databus.New(c.Databus),
pool: grpool.NewPool(runtime.NumCPU(), 10240),
}
if c.Riot.LoadPath != "" {
s.loadproc(c.Riot.LoadPath)
}
go s.watcherproc()
return s
}
// Ping Service
func (s *Service) Ping(c context.Context) (err error) {
return s.dao.Ping(c)
}
// Close Service
func (s *Service) Close() {
s.dao.Close()
}

View File

@@ -0,0 +1,220 @@
{
"swagger": "2.0",
"info": {
"title": "go-common api",
"description": "api",
"version": "1.0",
"contact": {
"email": "lintanghui@bilibili.com"
},
"license": {
"name": "Apache 2.0",
"url": "http://www.apache.org/licenses/LICENSE-2.0.html"
}
},
"paths": {
"/x/riot-search/fav/aids": {
"get": {
"operationId": "/x/riot-search/fav/aids",
"parameters": [
{
"in": "query",
"name": "ids",
"description": "数组,按逗号分隔",
"type": "array"
},
{
"in": "query",
"name": "keyword",
"required": true,
"type": "string"
},
{
"in": "query",
"name": "pn",
"description": " 最小值 1",
"type": "integer",
"format": "int64"
},
{
"in": "query",
"name": "ps",
"description": " 最小值 0",
"type": "integer",
"format": "int64"
}
],
"responses": {
"200": {
"description": "服务成功响应内容",
"schema": {
"type": "object",
"properties": {
"code": {
"description": "错误码描述",
"type": "integer"
},
"data": {
"$ref": "#/definitions/IDsResp",
"type": "object"
},
"message": {
"description": "错误码文本描述",
"type": "string"
},
"ttl": {
"description": "客户端限速时间",
"type": "integer",
"format": "int64"
}
}
}
}
}
}
},
"/x/riot-search/fav/contents": {
"get": {
"operationId": "/x/riot-search/fav/contents",
"parameters": [
{
"in": "query",
"name": "pn",
"description": " 最小值 1",
"type": "integer",
"format": "int64"
},
{
"in": "query",
"name": "ps",
"description": " 最小值 0",
"type": "integer",
"format": "int64"
},
{
"in": "query",
"name": "ids",
"description": "数组,按逗号分隔",
"type": "array"
},
{
"in": "query",
"name": "keyword",
"required": true,
"type": "string"
}
],
"responses": {
"200": {
"description": "服务成功响应内容",
"schema": {
"type": "object",
"properties": {
"code": {
"description": "错误码描述",
"type": "integer"
},
"data": {
"$ref": "#/definitions/DocumentsResp",
"type": "object"
},
"message": {
"description": "错误码文本描述",
"type": "string"
},
"ttl": {
"description": "客户端限速时间",
"type": "integer",
"format": "int64"
}
}
}
}
}
}
}
},
"definitions": {
"Document": {
"title": "Document",
"description": "Document id and content",
"type": "object",
"properties": {
"Content": {
"type": "string"
},
"ID": {
"type": "integer",
"format": "int64"
}
}
},
"DocumentsResp": {
"title": "DocumentsResp",
"description": "DocumentsResp resp of documents",
"type": "object",
"properties": {
"Documents": {
"type": "array",
"items": {
"$ref": "#/definitions/Document",
"type": "object"
}
},
"Page": {
"$ref": "#/definitions/Page",
"type": "object"
},
"Tokens": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"IDsResp": {
"title": "IDsResp",
"description": "IDsResp resp of ids",
"type": "object",
"properties": {
"IDs": {
"type": "array",
"items": {
"type": "integer",
"format": "int64"
}
},
"Page": {
"$ref": "#/definitions/Page",
"type": "object"
},
"Tokens": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"Page": {
"title": "Page",
"description": "Page Pager",
"type": "object",
"properties": {
"PageNum": {
"type": "integer",
"format": "int64"
},
"PageSize": {
"type": "integer",
"format": "int64"
},
"Total": {
"type": "integer",
"format": "int64"
}
}
}
}
}