Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys-recall/api/grpc/v1:all-srcs",
"//app/service/bbq/recsys-recall/cmd:all-srcs",
"//app/service/bbq/recsys-recall/conf:all-srcs",
"//app/service/bbq/recsys-recall/dao:all-srcs",
"//app/service/bbq/recsys-recall/model:all-srcs",
"//app/service/bbq/recsys-recall/server/grpc:all-srcs",
"//app/service/bbq/recsys-recall/server/http:all-srcs",
"//app/service/bbq/recsys-recall/service:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,29 @@
#### Version 1.0.8
> 增加新发视频实时倒排索引
> 正排质量信息增加曝光数
#### Version 1.0.7
> 增加倒排二进制反序列化
> 增加HTTP调试接口
#### Version 1.0.6
> 召回服务合并相同svid
#### Version 1.0.5
> PB格式正排数据定期load
#### Version 1.0.4
> 去重逻辑修改bloomfilter改为历史和每日新增两部分
#### Version 1.0.3
> 召回增加三级本地缓存减少redis压力
#### Version 1.0.2
> 单次请求增加召回源上限
#### Version 1.0.1
> 召回接口增加召回源漏斗信息
> 增加召回正排接口
#### Version 1.0.0
> BBQ推荐召回服务雏形上线

View File

@@ -0,0 +1,9 @@
# Owner
daiwei
liuzhiquan
# Author
# Reviewer
daiwei
liuzhiquan

View File

@@ -0,0 +1,14 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- daiwei
- liuzhiquan
labels:
- bbq
- service
- service/bbq/recsys-recall
options:
no_parent_owners: true
reviewers:
- daiwei
- liuzhiquan

View File

@@ -0,0 +1,12 @@
# recsys-recall-service
# 项目简介
1. bbq推荐召回
# 编译环境
# 依赖包
# 编译执行

View File

@@ -0,0 +1,63 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "v1_proto",
srcs = ["api.proto"],
tags = ["automanaged"],
deps = [
"//app/job/bbq/recall/proto:proto_proto",
"@com_google_protobuf//:empty_proto",
"@gogo_special_proto//github.com/gogo/protobuf/gogoproto",
],
)
go_proto_library(
name = "v1_go_proto",
compilers = ["@io_bazel_rules_go//proto:gogofast_grpc"],
importpath = "go-common/app/service/bbq/recsys-recall/api/grpc/v1",
proto = ":v1_proto",
tags = ["automanaged"],
deps = [
"//app/job/bbq/recall/proto:proto_go_proto",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@io_bazel_rules_go//proto/wkt:empty_go_proto",
],
)
go_library(
name = "go_default_library",
srcs = [],
embed = [":v1_go_proto"],
importpath = "go-common/app/service/bbq/recsys-recall/api/grpc/v1",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/proto:go_default_library",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
"@io_bazel_rules_go//proto/wkt:empty_go_proto",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_x_net//context:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,86 @@
syntax = "proto3";
package bbq.service.recall.v1;
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
import "app/job/bbq/recall/proto/forward_index.proto";
import "google/protobuf/empty.proto";
option go_package = "v1";
option (gogoproto.goproto_getters_all) = false;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
message RecallInfo {
string tag = 1 [(gogoproto.jsontag) = "tag",(gogoproto.moretags) = "form:\"tag\"",(gogoproto.customname) = "Tag"];
string name = 2 [(gogoproto.jsontag) = "name",(gogoproto.moretags) = "form:\"name\"",(gogoproto.customname) = "Name"];
string scorer = 3 [(gogoproto.jsontag) = "scorer",(gogoproto.moretags) = "form:\"scorer\"",(gogoproto.customname) = "Scorer"];
string filter = 4 [(gogoproto.jsontag) = "filter",(gogoproto.moretags) = "form:\"filter\"",(gogoproto.customname) = "Filter"];
string ranker = 5 [(gogoproto.jsontag) = "ranker",(gogoproto.moretags) = "form:\"ranker\"",(gogoproto.customname) = "Ranker"];
int32 priority = 6 [(gogoproto.jsontag) = "priority",(gogoproto.moretags) = "form:\"priority\"",(gogoproto.customname) = "Priority"];
int32 limit = 7 [(gogoproto.jsontag) = "limit",(gogoproto.moretags) = "form:\"limit\"",(gogoproto.customname) = "Limit"];
}
message RecallRequest {
int64 mid = 1 [(gogoproto.jsontag) = "mid",(gogoproto.moretags) = "form:\"mid\"",(gogoproto.customname) = "MID"];
string buvid = 2 [(gogoproto.jsontag) = "buvid",(gogoproto.moretags) = "form:\"buvid\"",(gogoproto.customname) = "BUVID"];
repeated RecallInfo info = 3 [(gogoproto.jsontag) = "infos",(gogoproto.moretags) = "form:\"infos\"",(gogoproto.customname) = "Infos"];
int32 total_limit = 4 [(gogoproto.jsontag) = "total_limit",(gogoproto.moretags) = "form:\"total_limit\"",(gogoproto.customname) = "TotalLimit"];
}
message InvertedIndex {
string index = 1 [(gogoproto.jsontag) = "index",(gogoproto.moretags) = "form:\"index\"",(gogoproto.customname) = "Index"];
string name = 2 [(gogoproto.jsontag) = "name",(gogoproto.moretags) = "form:\"name\"",(gogoproto.customname) = "Name"];
float score = 3 [(gogoproto.jsontag) = "score",(gogoproto.moretags) = "form:\"score\"",(gogoproto.customname) = "Score"];
}
message Video {
int64 svid = 1 [(gogoproto.jsontag) = "svid",(gogoproto.moretags) = "form:\"svid\"",(gogoproto.customname) = "SVID"];
float score = 2 [(gogoproto.jsontag) = "score",(gogoproto.moretags) = "form:\"score\"",(gogoproto.customname) = "Score"];
string name = 3 [(gogoproto.jsontag) = "name",(gogoproto.moretags) = "form:\"name\"",(gogoproto.customname) = "Name"];
bbq.job.recall.forwardindex.ForwardIndex forward_index = 4 [(gogoproto.jsontag) = "forward_index",(gogoproto.moretags) = "form:\"forward_index\"",(gogoproto.customname) = "ForwardIndex"];
string inverted_index = 5 [(gogoproto.jsontag) = "inverted_index",(gogoproto.moretags) = "form:\"inverted_index\"",(gogoproto.customname) = "InvertedIndex"];
repeated InvertedIndex indexes = 6 [(gogoproto.jsontag) = "inverted_indexes",(gogoproto.moretags) = "form:\"inverted_indexes\"",(gogoproto.customname) = "InvertedIndexes"];
}
message RecallSrc {
int32 total_hit = 1 [(gogoproto.jsontag) = "total_hit",(gogoproto.moretags) = "form:\"total_hit\"",(gogoproto.customname) = "TotalHit"];
int32 filter = 2 [(gogoproto.jsontag) = "filter",(gogoproto.moretags) = "form:\"filter\"",(gogoproto.customname) = "Filter"];
int32 final = 3 [(gogoproto.jsontag) = "final",(gogoproto.moretags) = "form:\"final\"",(gogoproto.customname) = "Final"];
string tag = 4 [(gogoproto.jsontag) = "tag",(gogoproto.moretags) = "form:\"tag\"",(gogoproto.customname) = "Tag"];
string name = 5 [(gogoproto.jsontag) = "name",(gogoproto.moretags) = "form:\"name\"",(gogoproto.customname) = "Name"];
}
message RecallResponse {
int32 total = 1 [(gogoproto.jsontag) = "total",(gogoproto.moretags) = "form:\"total\"",(gogoproto.customname) = "Total"];
repeated Video list = 2 [(gogoproto.jsontag) = "list",(gogoproto.moretags) = "form:\"list\"",(gogoproto.customname) = "List"];
repeated RecallSrc src_info = 3 [(gogoproto.jsontag) = "src_info",(gogoproto.moretags) = "form:\"src_info\"",(gogoproto.customname) = "SrcInfo"];
}
message VideoIndexRequest {
repeated int64 svids = 1 [(gogoproto.jsontag) = "svids",(gogoproto.moretags) = "form:\"svids\"",(gogoproto.customname) = "SVIDs"];
}
message VideoIndexResponse {
repeated bbq.job.recall.forwardindex.ForwardIndex list = 1 [(gogoproto.jsontag) = "list",(gogoproto.moretags) = "form:\"list\"",(gogoproto.customname) = "List"];
}
message NewIncomeVideoRequest {
repeated int64 svids = 1 [(gogoproto.jsontag) = "svids",(gogoproto.moretags) = "form:\"svids\"",(gogoproto.customname) = "SVIDs"];
string key = 2 [(gogoproto.jsontag) = "key",(gogoproto.moretags) = "form:\"key\"",(gogoproto.customname) = "Key"];
}
message VideosByIndexRequest {
string key = 1 [(gogoproto.jsontag) = "key",(gogoproto.moretags) = "form:\"key\"",(gogoproto.customname) = "Key"];
}
message VideosByIndexResponse {
string key = 1 [(gogoproto.jsontag) = "key",(gogoproto.moretags) = "form:\"key\"",(gogoproto.customname) = "Key"];
repeated int64 svids = 2 [(gogoproto.jsontag) = "svids",(gogoproto.moretags) = "form:\"svids\"",(gogoproto.customname) = "SVIDs"];
}
service RecsysRecall {
rpc Recall(RecallRequest) returns(RecallResponse);
rpc VideoIndex(VideoIndexRequest) returns(VideoIndexResponse);
rpc NewIncomeVideo(NewIncomeVideoRequest) returns(.google.protobuf.Empty);
rpc VideosByIndex(VideosByIndexRequest) returns(VideosByIndexResponse);
}

View File

@@ -0,0 +1,49 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = ["test.toml"],
importpath = "go-common/app/service/bbq/recsys-recall/cmd",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/conf:go_default_library",
"//app/service/bbq/recsys-recall/server/grpc:go_default_library",
"//app/service/bbq/recsys-recall/server/http:go_default_library",
"//app/service/bbq/recsys-recall/service:go_default_library",
"//app/service/bbq/recsys-recall/service/index:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/net/trace:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys-recall/cmd/client:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,40 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "client",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
importpath = "go-common/app/service/bbq/recsys-recall/cmd/client",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/time:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,115 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"time"
"go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/library/net/rpc/warden"
xtime "go-common/library/time"
)
var (
addr string
)
func test1(client v1.RecsysRecallClient) {
var infos []*v1.RecallInfo
// infos = append(infos, &v1.RecallInfo{
// Tag: "HOT_T1:30",
// Name: "HOT",
// Priority: 1,
// Limit: 20,
// })
// infos = append(infos, &v1.RecallInfo{
// Tag: "RECALL:HOT_T:10053",
// Name: "op",
// Scorer: "default",
// Filter: "bloomfilter",
// Priority: 2,
// Limit: 10,
// })
infos = append(infos, &v1.RecallInfo{
Tag: "RECALL:HOT_T:92",
Name: "175",
Scorer: "default",
Filter: "bloomfilter",
Priority: 1,
Limit: 5,
})
// infos = append(infos, &v1.RecallInfo{
// Tag: "bbq:recall:tagid:11",
// Name: "11",
// Limit: 20,
// })
// infos = append(infos, &v1.RecallInfo{
// Tag: "bbq:recall:tagid:802",
// Name: "802",
// Limit: 20,
// })
// infos = append(infos, &v1.RecallInfo{
// Tag: "bbq:recall:tagid:159",
// Name: "159",
// Limit: 20,
// })
// infos = append(infos, &v1.RecallInfo{
// Tag: "bbq:recall:tagid:1604",
// Name: "1604",
// Priority: 20,
// Limit: 20,
// })
req := &v1.RecallRequest{
MID: 5829468,
BUVID: "d9972de637d2f3b8939ee628a7ea789b",
Infos: infos,
TotalLimit: 20,
}
resp, _ := client.Recall(context.Background(), req)
fmt.Println(resp)
// if err != nil {
// fmt.Println(err)
// return
// }
for _, v := range resp.List {
fmt.Println(v)
}
// for _, v := range resp.SrcInfo {
// fmt.Println(v)
// }
}
// func test2(client v1.RecsysRecallClient) {
// request := &v1.VideoIndexRequest{
// SVIDs: []int64{265375},
// }
// resp, err := client.VideoIndex(context.Background(), request)
// if err != nil {
// fmt.Println(err)
// return
// }
// fmt.Println(resp)
// }
func init() {
flag.StringVar(&addr, "addr", "127.0.0.1:9000", "server addr")
}
func main() {
flag.Parse()
cfg := &warden.ClientConfig{
Dial: xtime.Duration(time.Second * 3),
Timeout: xtime.Duration(time.Second * 3),
}
cc, err := warden.NewClient(cfg).Dial(context.Background(), addr)
if err != nil {
log.Fatalf("new client failed!err:=%v", err)
return
}
client := v1.NewRecsysRecallClient(cc)
test1(client)
// test2(client)
}

View File

@@ -0,0 +1,59 @@
package main
import (
"flag"
"os"
"os/signal"
"syscall"
"go-common/app/service/bbq/recsys-recall/conf"
"go-common/app/service/bbq/recsys-recall/server/grpc"
"go-common/app/service/bbq/recsys-recall/server/http"
"go-common/app/service/bbq/recsys-recall/service"
"go-common/app/service/bbq/recsys-recall/service/index"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
"go-common/library/net/trace"
)
func main() {
// cpuf, err := os.Create("cpu_profile")
// if err != nil {
// panic(err)
// }
// pprof.StartCPUProfile(cpuf)
// defer pprof.StopCPUProfile()
flag.Parse()
if err := conf.Init(); err != nil {
panic(err)
}
log.Init(conf.Conf.Log)
defer log.Close()
log.Info("start")
trace.Init(conf.Conf.Tracer)
defer trace.Close()
ecode.Init(conf.Conf.Ecode)
srv := service.New(conf.Conf)
// 加载正排索引
index.Init(conf.Conf)
grpc.New(srv)
http.Init(conf.Conf)
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
for {
s := <-c
log.Info("get a signal %s", s.String())
switch s {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
log.Info("exit")
return
case syscall.SIGHUP:
default:
return
}
}
}

View File

@@ -0,0 +1,91 @@
[log]
stdout = true
[bm]
addr = "0.0.0.0:8080"
timeout = "1s"
[mysql]
addr = "172.16.38.91:3306"
dsn = "root:123456@tcp(172.16.38.91:3306)/bbq?timeout=200ms&readTimeout=200ms&writeTimeout=200ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"
readDSN = ["root:123456@tcp(172.16.38.91:3306)/bbq?timeout=200ms&readTimeout=200ms&writeTimeout=200ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"]
active = 20
idle = 10
idleTimeout ="4h"
queryTimeout = "100ms"
execTimeout = "100ms"
tranTimeout = "200ms"
[redis]
name = "recsys-recall-service"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[bfredis]
name = "recsys-recall-service"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[httpClient]
[httpClient.normal]
dial = "1s"
timeout = "10s"
keepAlive = "60s"
timer = 10
key = "7c7ac0db1aa05587"
secret = "9a6d62d93290c5f771ad381e9ca23f26"
[httpClient.normal.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100
[httpClient.slow]
dial = "500ms"
timeout = "1s"
keepAlive = "60s"
timer = 10
key = "7c7ac0db1aa05587"
secret = "9a6d62d93290c5f771ad381e9ca23f26"
[httpClient.slow.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100
[workPool]
capacity = 1024
maxWorkers = 512
maxIdleWorkers = 256
minIdleWorkers = 128
keepAlive = "30s"
[forwardIndex]
localPath = "/Users/daiwei/Downloads/idx.txt"
remotePath = "http://172.16.38.91/fs/out.txt"
md5Path = "http://172.16.38.91/fs/out.md5"
protocol = "http"
reloadDucation = "1800s"
[localCache]
l1Tags = []
l2Tags = []
level1 = "300s"
level2 = "600s"
level3 = "900s"
maxAge = "1800s"

View File

@@ -0,0 +1,38 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["conf.go"],
importpath = "go-common/app/service/bbq/recsys-recall/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/cache/redis:go_default_library",
"//library/conf:go_default_library",
"//library/database/sql:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
"//library/net/trace:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,119 @@
package conf
import (
"errors"
"flag"
"go-common/library/cache/redis"
"go-common/library/conf"
"go-common/library/database/sql"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
"go-common/library/net/trace"
xtime "go-common/library/time"
"github.com/BurntSushi/toml"
)
var (
confPath string
client *conf.Client
// Conf config
Conf = &Config{}
)
// Config .
type Config struct {
Log *log.Config
BM *bm.ServerConfig
Verify *verify.Config
Tracer *trace.Config
Redis *redis.Config
BFRedis *redis.Config
MySQL *sql.Config
Ecode *ecode.Config
WorkPool *WorkPoolConfig
ForwardIndex *ForwardIndexConfig
LocalCache *LocalCacheConfig
}
// WorkPoolConfig .
type WorkPoolConfig struct {
Capacity uint64
MaxWorkers uint64
MaxIdleWorkers uint64
MinIdleWorkers uint64
KeepAlive xtime.Duration
}
// ForwardIndexConfig .
type ForwardIndexConfig struct {
LocalPath string
RemotePath string
MD5Path string
Protocol string
ReloadDucation xtime.Duration
}
// LocalCacheConfig .
type LocalCacheConfig struct {
L1Tags []string
Level1 xtime.Duration
L2Tags []string
Level2 xtime.Duration
Level3 xtime.Duration
MaxAge xtime.Duration
}
func init() {
flag.StringVar(&confPath, "conf", "", "default config path")
}
// Init init conf
func Init() error {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if load() != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Toml2(); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return errors.New("could not decode config")
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,38 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"bloomfilter.go",
"dao.go",
],
importpath = "go-common/app/service/bbq/recsys-recall/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/conf:go_default_library",
"//app/service/bbq/recsys-recall/dao/cache:go_default_library",
"//library/cache/redis:go_default_library",
"//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys-recall/dao/cache:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,23 @@
package dao
import (
"context"
"go-common/library/cache/redis"
"github.com/Dai0522/go-hash/bloomfilter"
)
// LoadBloomFilter .
func (d *Dao) LoadBloomFilter(ctx *context.Context, key string) (*bloomfilter.BloomFilter, error) {
conn := d.bfredis.Get(*ctx)
defer conn.Close()
var bf *bloomfilter.BloomFilter
// 获取mid维度
raw, err := redis.Bytes(conn.Do("GET", key))
if err != redis.ErrNil && raw != nil {
bf, err = bloomfilter.Load(&raw)
}
return bf, err
}

View File

@@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["local.go"],
importpath = "go-common/app/service/bbq/recsys-recall/dao/cache",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//app/service/bbq/recsys-recall/conf:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,88 @@
package cache
import (
"sync"
"time"
"go-common/app/service/bbq/recsys-recall/conf"
)
// Solt .
type Solt struct {
data []byte
ctime int64
maxAge int64
lastUsed int64
keepAlived int64
}
// LocalCache .
type LocalCache struct {
d map[string]*Solt
l1tags map[string]byte
l2tags map[string]byte
c *conf.LocalCacheConfig
lock *sync.RWMutex
}
// NewLocalCache .
func NewLocalCache(c *conf.LocalCacheConfig) *LocalCache {
l1 := make(map[string]byte)
l2 := make(map[string]byte)
for _, v := range c.L1Tags {
l1[v] = byte(1)
}
for _, v := range c.L2Tags {
l2[v] = byte(1)
}
return &LocalCache{
d: make(map[string]*Solt),
l1tags: l1,
l2tags: l2,
c: c,
lock: &sync.RWMutex{},
}
}
// Set .
func (lc *LocalCache) Set(key string, val []byte) bool {
lc.lock.Lock()
defer lc.lock.Unlock()
keep := lc.c.Level3
if _, ok := lc.l1tags[key]; ok {
keep = lc.c.Level1
} else if _, ok := lc.l2tags[key]; ok {
keep = lc.c.Level2
}
lc.d[key] = &Solt{
data: val,
ctime: time.Now().Unix(),
maxAge: int64(lc.c.MaxAge),
lastUsed: time.Now().Unix(),
keepAlived: int64(keep),
}
return true
}
// Get .
func (lc *LocalCache) Get(key string) []byte {
lc.lock.RLock()
defer lc.lock.RUnlock()
current := time.Now().Unix()
s := lc.d[key]
if s == nil {
return nil
}
keepAlived := s.keepAlived / int64(time.Second)
maxAge := s.maxAge / int64(time.Second)
if keepAlived < (current-s.lastUsed) || maxAge < (current-s.ctime) {
return nil
}
s.lastUsed = current
return s.data
}

View File

@@ -0,0 +1,73 @@
package dao
import (
"context"
"go-common/app/service/bbq/recsys-recall/conf"
xcache "go-common/app/service/bbq/recsys-recall/dao/cache"
"go-common/library/cache/redis"
)
// Dao dao
type Dao struct {
c *conf.Config
redis *redis.Pool
bfredis *redis.Pool
lcache *xcache.LocalCache
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
dao = &Dao{
c: c,
redis: redis.NewPool(c.Redis),
bfredis: redis.NewPool(c.BFRedis),
lcache: xcache.NewLocalCache(c.LocalCache),
}
return
}
// GetInvertedIndex 获取倒排索引
func (d *Dao) GetInvertedIndex(ctx context.Context, key string, force bool) (b []byte, err error) {
if b = d.lcache.Get(key); b != nil && !force {
return
}
conn := d.redis.Get(ctx)
defer conn.Close()
for retry := 0; retry < 3; retry++ {
b, err = redis.Bytes(conn.Do("GET", key))
if err == redis.ErrNil {
b = make([]byte, 0)
return
}
if b != nil {
d.lcache.Set(key, b)
return
}
}
return
}
// SetInvertedIndex 更新倒排索引
func (d *Dao) SetInvertedIndex(c context.Context, key string, value []byte) error {
conn := d.redis.Get(c)
defer conn.Close()
_, err := conn.Do("SETEX", key, 86400, value)
return err
}
// Close close the resource.
func (d *Dao) Close() {
d.redis.Close()
}
// Ping dao ping
func (d *Dao) Ping(c context.Context) error {
// TODO: if you need use mc,redis, please add
return nil
}

View File

@@ -0,0 +1,29 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"model.go",
"tuple.go",
],
importpath = "go-common/app/service/bbq/recsys-recall/model",
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1 @@
package model

View File

@@ -0,0 +1,61 @@
package model
import (
"encoding/binary"
"math"
)
const (
_tupleSize = 12
)
// Tuple .
type Tuple struct {
Svid uint64
Score float32
}
// PriorityTuple .
type PriorityTuple struct {
Tuple
Tag string
Name string
Priority int32
}
// ToBytes .
func (t *Tuple) ToBytes() []byte {
b := make([]byte, 12)
b[0] = byte(t.Svid)
b[1] = byte(t.Svid >> 8)
b[2] = byte(t.Svid >> 16)
b[3] = byte(t.Svid >> 24)
b[4] = byte(t.Svid >> 32)
b[5] = byte(t.Svid >> 40)
b[6] = byte(t.Svid >> 48)
b[7] = byte(t.Svid >> 56)
// score
score := math.Float32bits(t.Score)
b[8] = byte(score)
b[9] = byte(score >> 8)
b[10] = byte(score >> 16)
b[11] = byte(score >> 24)
return b
}
// ParseTuple .
func ParseTuple(b []byte) *Tuple {
svid := binary.LittleEndian.Uint64(b[:8])
score := math.Float32frombits(binary.LittleEndian.Uint32(b[8:12]))
return &Tuple{
Svid: svid,
Score: score,
}
}
// TupleSize .
func TupleSize() int {
return _tupleSize
}

View File

@@ -0,0 +1,33 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["server.go"],
importpath = "go-common/app/service/bbq/recsys-recall/server/grpc",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys-recall/service:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/time:go_default_library",
"@org_golang_google_grpc//:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,46 @@
package grpc
import (
"context"
"flag"
"time"
v1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys-recall/service"
"go-common/library/net/rpc/warden"
xtime "go-common/library/time"
"google.golang.org/grpc"
)
var (
_gRPCAddr string
)
func init() {
flag.StringVar(&_gRPCAddr, "grpc_addr", "0.0.0.0:9000", "default config path")
}
//New 生成rpc服务
func New(srv *service.Service) *warden.Server {
servConf := &warden.ServerConfig{
Addr: _gRPCAddr,
Timeout: xtime.Duration(2 * time.Second),
}
s := warden.NewServer(servConf)
s.Use(middleware())
v1.RegisterRecsysRecallServer(s.Server(), srv)
_, err := s.Start()
if err != nil {
panic("run server failed!" + err.Error())
}
return s
}
func middleware() grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
//call chain
resp, err = handler(ctx, req)
return
}
}

View File

@@ -0,0 +1,36 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["http.go"],
importpath = "go-common/app/service/bbq/recsys-recall/server/http",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys-recall/conf:go_default_library",
"//app/service/bbq/recsys-recall/service:go_default_library",
"//app/service/bbq/recsys-recall/service/index:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/binding:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,138 @@
package http
import (
"fmt"
"net/http"
grpc "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys-recall/conf"
"go-common/app/service/bbq/recsys-recall/service"
"go-common/app/service/bbq/recsys-recall/service/index"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/binding"
"go-common/library/net/http/blademaster/middleware/verify"
)
var (
srv *service.Service
vfy *verify.Verify
)
// Init init
func Init(c *conf.Config) {
srv = service.New(c)
vfy = verify.New(c.Verify)
engine := bm.DefaultServer(c.BM)
route(engine)
if err := engine.Start(); err != nil {
log.Error("bm Start error(%v)", err)
panic(err)
}
}
func route(e *bm.Engine) {
e.Ping(ping)
e.Register(register)
g := e.Group("/bbq/internal/recall")
{
g.GET("/start", vfy.Verify, howToStart)
g.GET("/forward_index", forwardIndex)
g.GET("/inverted_index", invertedIndex)
g.GET("/recall", recall)
g.GET("/videos", videosByIndex)
g.POST("/new_income", newIncomeVideos)
}
}
func forwardIndex(c *bm.Context) {
args := struct {
Svid uint64 `form:"svid" json:"svid" validate:"required"`
}{}
var err error
if err = c.Bind(&args); err != nil {
log.Errorv(*c, log.KV("log", err))
return
}
if res := index.Index.Get(args.Svid); res != nil {
c.String(0, res.String())
return
}
c.String(0, "error: %v", err)
}
func invertedIndex(c *bm.Context) {
c.JSON(nil, nil)
}
func recall(c *bm.Context) {
args := struct {
MID int64 `json:"mid" form:"mid"`
BUVID string `json:"buvid" form:"buvid"`
TotalLimit int32 `json:"total_limit" form:"total_limit"`
Tag string `json:"tag" form:"tag"`
Name string `json:"name" form:"name"`
Scorer string `json:"scorer" form:"scorer"`
Filter string `json:"filter" form:"filter"`
Ranker string `json:"ranker" form:"ranker"`
Priority int32 `json:"priority" form:"priority"`
Limit int32 `json:"limit" form:"limit"`
}{}
if err := c.Bind(&args); err != nil {
return
}
req := &grpc.RecallRequest{
MID: args.MID,
BUVID: args.BUVID,
TotalLimit: args.TotalLimit,
Infos: []*grpc.RecallInfo{
{
Tag: args.Tag,
Name: args.Name,
Scorer: args.Scorer,
Filter: args.Filter,
Ranker: args.Ranker,
Priority: args.Priority,
Limit: args.Limit,
},
},
}
c.JSON(srv.Recall(c, req))
}
func newIncomeVideos(c *bm.Context) {
args := &grpc.NewIncomeVideoRequest{}
if err := c.BindWith(args, binding.JSON); err != nil {
return
}
fmt.Println(args)
c.JSON(srv.NewIncomeVideo(c, args))
}
func videosByIndex(c *bm.Context) {
args := &grpc.VideosByIndexRequest{}
if err := c.Bind(args); err != nil {
return
}
c.JSON(srv.VideosByIndex(c, args))
}
func ping(c *bm.Context) {
if err := srv.Ping(c); err != nil {
log.Error("ping error(%v)", err)
c.AbortWithStatus(http.StatusServiceUnavailable)
}
}
func register(c *bm.Context) {
c.JSON(map[string]interface{}{}, nil)
}
// example for http request handler
func howToStart(c *bm.Context) {
c.String(0, "Golang 大法好 !!!")
}

View File

@@ -0,0 +1,52 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"bloomfilter.go",
"filter.go",
"parallel.go",
"ranker.go",
"recall.go",
"result.go",
"score.go",
"service.go",
"task.go",
],
importpath = "go-common/app/service/bbq/recsys-recall/service",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/proto:go_default_library",
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys-recall/conf:go_default_library",
"//app/service/bbq/recsys-recall/dao:go_default_library",
"//app/service/bbq/recsys-recall/model:go_default_library",
"//app/service/bbq/recsys-recall/service/index:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library",
"//vendor/github.com/Dai0522/workpool:go_default_library",
"//vendor/github.com/json-iterator/go:go_default_library",
"@io_bazel_rules_go//proto/wkt:empty_go_proto",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys-recall/service/index:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,76 @@
package service
import (
"context"
"fmt"
"go-common/library/log"
"strconv"
"time"
"github.com/Dai0522/go-hash/bloomfilter"
)
const (
_baseBfKey = "BBQ:BF:V1:%s"
_baseBfKeyDaily = "BBQ:BF:V1:%s:%s"
)
func (s *Service) loadBloomFilter(ctx *context.Context, mid int64, buvid string) (result []*bloomfilter.BloomFilter) {
dt := time.Now().Format("20060102")
lastDt := time.Now().AddDate(0, 0, -1).Format("20060102")
if mid != 0 {
// history part
key := fmt.Sprintf(_baseBfKey, strconv.FormatInt(mid, 10))
bf, err := s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_HIS_BF", err))
} else {
result = append(result, bf)
}
// daily part
key = fmt.Sprintf(_baseBfKeyDaily, strconv.FormatInt(mid, 10), dt)
bf, err = s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_DAILY_BF", err))
} else {
result = append(result, bf)
}
// lastday part
key = fmt.Sprintf(_baseBfKeyDaily, strconv.FormatInt(mid, 10), lastDt)
bf, err = s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_DAILY_BF", err))
} else {
result = append(result, bf)
}
}
if buvid != "" {
// history part
key := fmt.Sprintf(_baseBfKey, buvid)
bf, err := s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_HIS_BF", err))
} else {
result = append(result, bf)
}
// daily part
key = fmt.Sprintf(_baseBfKeyDaily, buvid, dt)
bf, err = s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_DAILY_BF", err))
} else {
result = append(result, bf)
}
// lastday part
key = fmt.Sprintf(_baseBfKeyDaily, buvid, lastDt)
bf, err = s.dao.LoadBloomFilter(ctx, key)
if err != nil {
log.Errorv(*ctx, log.KV("MID_DAILY_BF", err))
} else {
result = append(result, bf)
}
}
return
}

View File

@@ -0,0 +1,75 @@
package service
import (
"encoding/binary"
"go-common/app/service/bbq/recsys-recall/service/index"
"github.com/Dai0522/go-hash/bloomfilter"
)
// Filter interface
type Filter interface {
doFilter(uint64) bool
}
// FilterManager .
type FilterManager struct {
filters map[string]*Filter
}
// NewFilterManager .
func NewFilterManager(args ...interface{}) *FilterManager {
f := make(map[string]*Filter)
return &FilterManager{
filters: f,
}
}
// SetFilter .
func (fm *FilterManager) SetFilter(name string, f Filter) {
fm.filters[name] = &f
}
// DoFilter .
func (fm *FilterManager) DoFilter(svid uint64, names ...string) bool {
res := false
for _, n := range names {
if _, ok := fm.filters[n]; !ok {
continue
}
res = res || (*fm.filters[n]).doFilter(svid)
}
return res
}
// DefaultFilter .
type DefaultFilter struct{}
func (f *DefaultFilter) doFilter(svid uint64) bool {
// 状态过滤 state > 0 可进推荐
fi := index.Index.Get(svid)
if fi == nil || fi.BasicInfo == nil || fi.BasicInfo.State < 0 || fi.BasicInfo.State == 2 {
return true
}
return false
}
// BloomFilter struct
type BloomFilter struct {
bf []*bloomfilter.BloomFilter
}
// doFilter .
func (f *BloomFilter) doFilter(svid uint64) bool {
// 观看记录过滤
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, svid)
for _, v := range f.bf {
if v.MightContain(b) {
return true
}
}
return false
}

View File

@@ -0,0 +1,46 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"forward_index.go",
"inverted_index.go",
"loader.go",
],
importpath = "go-common/app/service/bbq/recsys-recall/service/index",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/proto:go_default_library",
"//app/service/bbq/recsys-recall/conf:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/golang/snappy:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_xtest",
srcs = ["inverted_index_test.go"],
tags = ["automanaged"],
deps = ["//app/service/bbq/recsys-recall/service/index:go_default_library"],
)

View File

@@ -0,0 +1,100 @@
package index
import (
"os"
"sync"
"syscall"
"time"
"go-common/app/job/bbq/recall/proto"
"go-common/app/service/bbq/recsys-recall/conf"
"go-common/library/log"
)
var (
// Index 正排索引
Index *ForwardIndex
loader Loader
)
// ForwardIndex 正排索引
type ForwardIndex struct {
data *map[uint64]*proto.ForwardIndex
lock sync.RWMutex
ch chan os.Signal
}
// Init .
func Init(c *conf.Config) {
loader = &LocalLoader{
path: c.ForwardIndex.LocalPath,
}
if c.ForwardIndex.LocalPath == "" {
loader = &RemoteLoader{
path: c.ForwardIndex.RemotePath,
md5Path: c.ForwardIndex.MD5Path,
md5: "",
}
}
d, err := loader.Load()
if err != nil {
log.Error("Load ForwardIndex from local error %+v", err)
panic(err)
}
Index = &ForwardIndex{
data: d,
ch: make(chan os.Signal, 1),
}
go func(c *conf.Config) {
if c.ForwardIndex.RemotePath == "" || c.ForwardIndex.MD5Path == "" {
return
}
duration := time.Duration(c.ForwardIndex.ReloadDucation)
loader := &RemoteLoader{
path: c.ForwardIndex.RemotePath,
md5Path: c.ForwardIndex.MD5Path,
md5: "",
}
for {
select {
case s := <-Index.ch:
log.Info("ForwardIndex get a signal %s", s.String())
switch s {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
return
case syscall.SIGHUP:
default:
return
}
case <-time.After(duration):
data, err := loader.Load()
if err != nil || data == nil {
log.Error("ForwardIndex load data %+v", err)
continue
}
Index.lock.Lock()
Index.data = data
Index.lock.Unlock()
log.Info("reload the positive index")
}
}
}(c)
}
// Signal .
func (pi *ForwardIndex) Signal(s os.Signal) {
pi.ch <- s
}
// Get .
func (pi *ForwardIndex) Get(svid uint64) *proto.ForwardIndex {
pi.lock.RLocker().Lock()
defer pi.lock.RUnlock()
if res, ok := (*pi.data)[svid]; ok {
return res
}
return nil
}

View File

@@ -0,0 +1,118 @@
package index
import (
"encoding/binary"
"errors"
)
const (
_invertedIndexHeadSize = 13
_invertedIndexBlockSize = 8
_magicNumber = 0xffffffffdeadbeef
)
// InvertedIndexHead .
type InvertedIndexHead struct {
Magic uint64
SourceType byte
BodyLength uint32
}
// InvertedIndex .
type InvertedIndex struct {
head *InvertedIndexHead
Data []uint64
}
func (h *InvertedIndexHead) load(b []byte) error {
if len(b) != _invertedIndexHeadSize {
return errors.New("invalid head length")
}
h.Magic = binary.BigEndian.Uint64(b[:8])
if h.Magic != _magicNumber {
return errors.New("invalid head")
}
h.SourceType = b[8]
h.BodyLength = binary.BigEndian.Uint32(b[9:])
return nil
}
func (h *InvertedIndexHead) serialize() []byte {
b := make([]byte, _invertedIndexHeadSize)
b[0] = byte(h.Magic >> 56)
b[1] = byte(h.Magic >> 48)
b[2] = byte(h.Magic >> 40)
b[3] = byte(h.Magic >> 32)
b[4] = byte(h.Magic >> 24)
b[5] = byte(h.Magic >> 16)
b[6] = byte(h.Magic >> 8)
b[7] = byte(h.Magic)
b[8] = h.SourceType
b[9] = byte(h.BodyLength >> 24)
b[10] = byte(h.BodyLength >> 16)
b[11] = byte(h.BodyLength >> 8)
b[12] = byte(h.BodyLength)
return b
}
// Load 反序列化
func (ii *InvertedIndex) Load(b []byte) error {
if len(b) < _invertedIndexHeadSize {
return errors.New("invalid data")
}
ii.head = new(InvertedIndexHead)
if err := ii.head.load(b[:_invertedIndexHeadSize]); err != nil {
return err
}
offset := _invertedIndexHeadSize
blocks := int(ii.head.BodyLength)
ii.Data = make([]uint64, blocks)
for i := 0; i < blocks; i++ {
cursor := offset + (i * _invertedIndexBlockSize)
tmp := b[cursor : cursor+_invertedIndexBlockSize]
if len(tmp) != _invertedIndexBlockSize {
return errors.New("invalid item length")
}
ii.Data[i] = binary.BigEndian.Uint64(tmp)
}
return nil
}
// Serialize 序列化倒排索引
func (ii *InvertedIndex) Serialize() []byte {
ii.head = &InvertedIndexHead{
Magic: _magicNumber,
SourceType: byte(1),
BodyLength: uint32(len(ii.Data)),
}
totalLen := _invertedIndexHeadSize + (len(ii.Data) * _invertedIndexBlockSize)
b := make([]byte, totalLen)
// head
hb := ii.head.serialize()
copy(b, hb)
// body
offset := _invertedIndexHeadSize
for i := 0; i < len(ii.Data); i++ {
b[offset+0] = byte(ii.Data[i] >> 56)
b[offset+1] = byte(ii.Data[i] >> 48)
b[offset+2] = byte(ii.Data[i] >> 40)
b[offset+3] = byte(ii.Data[i] >> 32)
b[offset+4] = byte(ii.Data[i] >> 24)
b[offset+5] = byte(ii.Data[i] >> 16)
b[offset+6] = byte(ii.Data[i] >> 8)
b[offset+7] = byte(ii.Data[i])
offset = offset + _invertedIndexBlockSize
}
return b
}

View File

@@ -0,0 +1,22 @@
package index_test
import (
"testing"
"go-common/app/service/bbq/recsys-recall/service/index"
)
func TestInvertedIndex(t *testing.T) {
src := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 0}
srcII := &index.InvertedIndex{
Data: src,
}
raw := srcII.Serialize()
dstII := &index.InvertedIndex{}
dstII.Load(raw)
for i := range src {
if src[i] != dstII.Data[i] {
t.Error("incorrect data")
}
}
}

View File

@@ -0,0 +1,122 @@
package index
import (
"encoding/hex"
"io/ioutil"
"net/http"
"os"
"strings"
"go-common/app/job/bbq/recall/proto"
"go-common/library/log"
"github.com/golang/snappy"
)
// Loader .
type Loader interface {
Load() (*map[uint64]*proto.ForwardIndex, error)
}
// LocalLoader .
type LocalLoader struct {
path string
}
// Load .
func (l *LocalLoader) Load() (result *map[uint64]*proto.ForwardIndex, err error) {
data := make(map[uint64]*proto.ForwardIndex)
f, err := os.Open(l.path)
if err != nil {
return
}
defer f.Close()
b, err := ioutil.ReadAll(f)
items := strings.Split(string(b), "\n")
for _, v := range items {
if v == "" {
continue
}
compressed, err := hex.DecodeString(v)
if err != nil {
log.Error("hex decode: [%v] [%s]", err, string(v))
continue
}
raw, err := snappy.Decode(nil, compressed)
if err != nil {
log.Error("snappy decode: [%v] [%s]", err, string(v))
continue
}
tmp := &proto.ForwardIndex{}
err = tmp.Unmarshal(raw)
if err != nil {
log.Error("proto decode: [%v] [%s]", err, string(v))
continue
}
data[tmp.SVID] = tmp
}
result = &data
return
}
// RemoteLoader .
type RemoteLoader struct {
path string
md5Path string
md5 string
}
// Load .
func (l *RemoteLoader) Load() (result *map[uint64]*proto.ForwardIndex, err error) {
md5, err := l.loadMD5()
if err != nil || md5 == l.md5 {
return
}
data := make(map[uint64]*proto.ForwardIndex)
resp, err := http.DefaultClient.Get(l.path)
if err != nil {
return
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
items := strings.Split(string(b), "\n")
for _, v := range items {
raw, err := hex.DecodeString(v)
if err != nil {
log.Error("hex decode: %v", err)
continue
}
raw, err = snappy.Decode(nil, raw)
if err != nil {
log.Error("snappy decode: %v", err)
continue
}
tmp := &proto.ForwardIndex{}
err = tmp.Unmarshal(raw)
if err != nil {
continue
}
data[tmp.SVID] = tmp
}
result = &data
l.md5 = md5
return
}
func (l *RemoteLoader) loadMD5() (string, error) {
resp, err := http.DefaultClient.Get(l.md5Path)
if err != nil {
return "", err
}
raw, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(raw), err
}

View File

@@ -0,0 +1,55 @@
package service
import (
"context"
"time"
"go-common/library/log"
"github.com/Dai0522/workpool"
)
func (s *Service) parallel(ctx *context.Context, tasks []*RecallTask) []*workpool.FutureTask {
ftasks := make([]*workpool.FutureTask, len(tasks))
for i := range tasks {
ft := workpool.NewFutureTask(tasks[i])
err := s.wp.Submit(ft)
retry := 0
for err != nil && retry < 3 {
log.Errorv(*ctx, log.KV("workpool", err))
err = s.wp.Submit(ft)
retry++
}
ftasks[i] = ft
}
return ftasks
}
func (s *Service) wait(ctx context.Context, ftasks []*workpool.FutureTask) []*RecallResult {
result := make([]*RecallResult, len(ftasks))
for i, t := range ftasks {
pt := t.T.(*RecallTask)
result[i] = &RecallResult{
Tag: (*pt).info.Tag,
Name: (*pt).info.Name,
Priority: (*pt).info.Priority,
}
raw, err := t.Wait(100 * time.Millisecond)
if err != nil || raw == nil || len(*raw) <= 0 {
log.Errorv(ctx, log.KV("future task wait", err))
continue
}
res, err := parseResult(raw)
if err != nil {
log.Errorv(ctx, log.KV("parse recall result", err))
continue
}
result[i].Result = *res
}
return result
}

View File

@@ -0,0 +1,63 @@
package service
import (
"go-common/app/service/bbq/recsys-recall/dao"
"go-common/app/service/bbq/recsys-recall/model"
)
// Ranker interface
type Ranker interface {
doRank(tuples *[]*model.Tuple, comp func(interface{}, interface{}) bool)
}
// RankerManager .
type RankerManager struct {
rankers map[string]Ranker
}
// NewRankerManager .
func NewRankerManager(d *dao.Dao) *RankerManager {
r := make(map[string]Ranker)
r["default"] = &DefaultRanker{
d: d,
}
return &RankerManager{
rankers: r,
}
}
// DoRank .
func (rm *RankerManager) DoRank(tuples *[]*model.Tuple, name string, comp func(interface{}, interface{}) bool) {
if r, ok := rm.rankers[name]; ok && r != nil {
r.doRank(tuples, comp)
}
}
// DefaultRanker .
type DefaultRanker struct {
d *dao.Dao
}
func defaultCompare(a, b interface{}) bool {
t1 := a.(model.Tuple)
t2 := b.(model.Tuple)
return t1.Score > t2.Score
}
func (dr *DefaultRanker) doRank(tuples *[]*model.Tuple, comp func(interface{}, interface{}) bool) {
if len(*tuples) <= 0 {
return
}
for i, u := range *tuples {
for j, v := range *tuples {
if comp(u, v) {
tmp := (*tuples)[i]
(*tuples)[i] = (*tuples)[j]
(*tuples)[j] = tmp
}
}
}
}

View File

@@ -0,0 +1,187 @@
package service
import (
"context"
"go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys-recall/model"
"go-common/app/service/bbq/recsys-recall/service/index"
"go-common/library/log"
"github.com/Dai0522/workpool"
)
const (
_recallLimit = 50
)
// Recall recsys recall video id list by tag
func (s *Service) Recall(ctx context.Context, request *v1.RecallRequest) (*v1.RecallResponse, error) {
var response *v1.RecallResponse
// recall from redis
ftasks := s.parallelRecall(&ctx, request)
recallResult := s.wait(ctx, ftasks)
totalRecall := 0
srcInfo := make([]*v1.RecallSrc, len(recallResult))
for i, v := range recallResult {
totalRecall += len(v.Result.Tuples)
srcInfo[i] = &v1.RecallSrc{
TotalHit: v.TotalHit,
Filter: v.FilterCount,
Final: v.FinalCount,
Tag: v.Tag,
Name: v.Name,
}
}
offset := 0
priorityTuples := make([]*model.PriorityTuple, totalRecall)
for _, v := range recallResult {
for _, u := range v.Result.Tuples {
priorityTuples[offset] = &model.PriorityTuple{
Tuple: *u,
Tag: v.Tag,
Name: v.Name,
Priority: v.Priority,
}
offset++
}
}
// merge
videos := s.merge(ctx, priorityTuples, request.TotalLimit)
response = &v1.RecallResponse{
Total: int32(totalRecall),
List: videos,
SrcInfo: srcInfo,
}
log.Infov(ctx, log.KV("total_recall", totalRecall), log.KV("result", len(videos)))
return response, nil
}
func (s *Service) parallelRecall(ctx *context.Context, request *v1.RecallRequest) []*workpool.FutureTask {
size := len(request.Infos)
if size > _recallLimit {
log.Errorv(*ctx, log.KV("RecallTag", size))
size = _recallLimit
}
sc := NewScorerManager(s.dao)
ranker := NewRankerManager(s.dao)
filter := NewFilterManager(s.dao)
filter.SetFilter("default", &DefaultFilter{})
filter.SetFilter("bloomfilter", &BloomFilter{bf: s.loadBloomFilter(ctx, request.MID, request.BUVID)})
var list []*v1.RecallInfo
if len(request.Infos) > _recallLimit {
// 优先级排序取前50
list = recallSrcSortByPriority(request.Infos, size)
} else {
list = request.Infos
}
tasks := make([]*RecallTask, size)
for i, v := range list {
t := newRecallTask(ctx, s.dao, request.MID, request.BUVID, v)
t.SetScorerManager(sc)
t.SetRankerManager(ranker)
t.SetFilterManager(filter)
tasks[i] = t
}
return s.parallel(ctx, tasks)
}
// merge
func (s *Service) merge(c context.Context, tuples []*model.PriorityTuple, limit int32) []*v1.Video {
list := sortByScore(tuples)
list = sortByPriority(list)
count := int32(0)
videos := make(map[uint64]*v1.Video)
for _, v := range list {
if _, ok := videos[v.Svid]; !ok {
if count > limit {
continue
}
fi := index.Index.Get(v.Svid)
if fi == nil {
log.Errorv(c, log.KV("forward_index", nil), log.KV("svid", v.Svid))
}
videos[v.Svid] = &v1.Video{
SVID: int64(v.Svid),
Score: v.Score,
Name: v.Name,
InvertedIndex: v.Tag,
ForwardIndex: fi,
InvertedIndexes: []*v1.InvertedIndex{
{
Index: v.Tag,
Name: v.Name,
Score: v.Score,
},
},
}
count++
} else {
videos[v.Svid].InvertedIndexes = append(videos[v.Svid].InvertedIndexes, &v1.InvertedIndex{
Index: v.Tag,
Name: v.Name,
Score: v.Score,
})
}
}
result := make([]*v1.Video, count)
i := 0
for _, v := range videos {
result[i] = v
i++
}
return result
}
func sortByScore(tuples []*model.PriorityTuple) []*model.PriorityTuple {
for i := range tuples {
for j := range tuples {
if (*tuples[i]).Score > (*tuples[j]).Score {
tmp := tuples[i]
tuples[i] = tuples[j]
tuples[j] = tmp
}
}
}
return tuples
}
func sortByPriority(tuples []*model.PriorityTuple) []*model.PriorityTuple {
for i := range tuples {
for j := range tuples {
if (*tuples[i]).Priority > (*tuples[j]).Priority {
tmp := tuples[i]
tuples[i] = tuples[j]
tuples[j] = tmp
}
}
}
return tuples
}
func recallSrcSortByPriority(list []*v1.RecallInfo, limit int) []*v1.RecallInfo {
for i := range list {
for j := range list {
if list[i].Priority > list[j].Priority {
tmp := list[i]
list[i] = list[j]
list[j] = tmp
}
}
}
return list[:limit]
}

View File

@@ -0,0 +1,95 @@
package service
import (
"encoding/binary"
"errors"
"go-common/app/service/bbq/recsys-recall/model"
)
// Result .
type Result struct {
TotalHit int32
FilterCount int32
FinalCount int32
Tuples []*model.Tuple
}
// RecallResult .
type RecallResult struct {
Result
Tag string
Name string
Priority int32
}
// ToBytes .
func (rr *Result) ToBytes() *[]byte {
totalLen := 12 + 12*len(rr.Tuples)
b := make([]byte, totalLen)
// total hit
offset := 0
b[offset] = byte(rr.TotalHit)
b[offset+1] = byte(rr.TotalHit >> 8)
b[offset+2] = byte(rr.TotalHit >> 16)
b[offset+3] = byte(rr.TotalHit >> 24)
// filter
offset += 4
b[offset] = byte(rr.FilterCount)
b[offset+1] = byte(rr.FilterCount >> 8)
b[offset+2] = byte(rr.FilterCount >> 16)
b[offset+3] = byte(rr.FilterCount >> 24)
// final
offset += 4
b[offset] = byte(rr.FinalCount)
b[offset+1] = byte(rr.FinalCount >> 8)
b[offset+2] = byte(rr.FinalCount >> 16)
b[offset+3] = byte(rr.FinalCount >> 24)
// tuples
offset += 4
for _, v := range rr.Tuples {
tuple := v.ToBytes()
for i := range tuple {
b[offset+i] = tuple[i]
}
offset += len(tuple)
}
return &b
}
func parseResult(raw *[]byte) (*Result, error) {
if len(*raw) <= 0 {
return nil, errors.New("parse recall result invalid length")
}
// total hit
offset := 0
totalHit := binary.LittleEndian.Uint32((*raw)[offset : offset+4])
// filter
offset = offset + 4
filterCount := binary.LittleEndian.Uint32((*raw)[offset : offset+4])
// final
offset = offset + 4
finalCount := binary.LittleEndian.Uint32((*raw)[offset : offset+4])
// tuple
offset = offset + 4
nblocks := (len((*raw)) - offset) / model.TupleSize()
tuples := make([]*model.Tuple, nblocks)
for i := 0; i < nblocks; i++ {
tuples[i] = model.ParseTuple((*raw)[offset : offset+model.TupleSize()])
offset += model.TupleSize()
}
return &Result{
TotalHit: int32(totalHit),
FilterCount: int32(filterCount),
FinalCount: int32(finalCount),
Tuples: tuples,
}, nil
}

View File

@@ -0,0 +1,46 @@
package service
import (
"go-common/app/service/bbq/recsys-recall/dao"
"math/rand"
)
// Scorer interface
type Scorer interface {
doScore(uint64, ...interface{}) float32
}
// ScorerManager .
type ScorerManager struct {
scorers map[string]Scorer
}
// NewScorerManager .
func NewScorerManager(d *dao.Dao) *ScorerManager {
s := make(map[string]Scorer)
s["default"] = &DefaultScorer{
d: d,
}
return &ScorerManager{
scorers: s,
}
}
// DoScore .
func (sm *ScorerManager) DoScore(svid uint64, name string, params ...interface{}) float32 {
if s, ok := sm.scorers[name]; ok && s != nil {
return s.doScore(svid, params...)
}
return float32(0)
}
// DefaultScorer .
type DefaultScorer struct {
d *dao.Dao
}
func (ds *DefaultScorer) doScore(svid uint64, params ...interface{}) float32 {
return rand.Float32()
}

View File

@@ -0,0 +1,126 @@
package service
import (
"context"
"encoding/binary"
"time"
"github.com/json-iterator/go"
"go-common/app/job/bbq/recall/proto"
"go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys-recall/conf"
"go-common/app/service/bbq/recsys-recall/dao"
"go-common/app/service/bbq/recsys-recall/service/index"
"go-common/library/log"
"github.com/Dai0522/workpool"
empty "github.com/golang/protobuf/ptypes/empty"
)
// Service struct
type Service struct {
c *conf.Config
dao *dao.Dao
wp *workpool.Pool
}
// New init
func New(c *conf.Config) (s *Service) {
workpoolConf := &workpool.PoolConfig{
MaxWorkers: c.WorkPool.MaxWorkers,
MaxIdleWorkers: c.WorkPool.MaxIdleWorkers,
MinIdleWorkers: c.WorkPool.MinIdleWorkers,
KeepAlive: time.Duration(c.WorkPool.KeepAlive),
}
wp, err := workpool.NewWorkerPool(c.WorkPool.Capacity, workpoolConf)
if err != nil {
panic(err)
}
wp.Start()
s = &Service{
c: c,
dao: dao.New(c),
wp: wp,
}
return s
}
// Ping Service
func (s *Service) Ping(c context.Context) (err error) {
return s.dao.Ping(c)
}
// Close Service
func (s *Service) Close() {
s.dao.Close()
}
// VideoIndex 获取视频正排信息
func (s *Service) VideoIndex(ctx context.Context, in *v1.VideoIndexRequest) (*v1.VideoIndexResponse, error) {
var idxs []*proto.ForwardIndex
for _, v := range in.SVIDs {
fi := index.Index.Get(uint64(v))
if fi == nil {
log.Errorv(ctx, log.KV("forward_index", nil))
continue
}
idxs = append(idxs, fi)
}
return &v1.VideoIndexResponse{
List: idxs,
}, nil
}
// NewIncomeVideo 更新新发视频标签
func (s *Service) NewIncomeVideo(ctx context.Context, in *v1.NewIncomeVideoRequest) (res *empty.Empty, err error) {
res = new(empty.Empty)
svids := make([]uint64, len(in.SVIDs))
for i := range in.SVIDs {
svids[i] = uint64(in.SVIDs[i])
}
ii := &index.InvertedIndex{
Data: svids,
}
s.dao.SetInvertedIndex(ctx, in.Key, ii.Serialize())
return
}
// VideosByIndex 获取单个倒排下的视频列表
func (s *Service) VideosByIndex(ctx context.Context, in *v1.VideosByIndexRequest) (res *v1.VideosByIndexResponse, err error) {
raw, err := s.dao.GetInvertedIndex(ctx, in.Key, true)
if err != nil {
return
}
var recallList []uint64
if binary.BigEndian.Uint64(raw[:8]) == 0xffffffffdeadbeef {
ii := new(index.InvertedIndex)
if err = ii.Load(raw); err != nil {
log.Errorv(ctx, log.KV("Tag", in.Key), log.KV("inverted index load", err))
return
}
recallList = ii.Data
} else {
if err = jsoniter.Unmarshal(raw, &recallList); err != nil {
log.Errorv(ctx, log.KV("Tag", in.Key), log.KV("jsoninter", err))
return
}
}
svidList := make([]int64, len(recallList))
for i := range recallList {
svidList[i] = int64(recallList[i])
}
res = &v1.VideosByIndexResponse{
Key: in.Key,
SVIDs: svidList,
}
return
}

View File

@@ -0,0 +1,120 @@
package service
import (
"context"
"encoding/binary"
"go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys-recall/dao"
"go-common/app/service/bbq/recsys-recall/model"
"go-common/app/service/bbq/recsys-recall/service/index"
"go-common/library/log"
jsoniter "github.com/json-iterator/go"
)
// RecallTask .
type RecallTask struct {
ctx *context.Context
d *dao.Dao
mid int64
buvid string
info *v1.RecallInfo
sc *ScorerManager
ranker *RankerManager
filter *FilterManager
debug bool
}
func newRecallTask(ctx *context.Context, d *dao.Dao, mid int64, buvid string, info *v1.RecallInfo) *RecallTask {
return &RecallTask{
ctx: ctx,
d: d,
mid: mid,
buvid: buvid,
info: info,
debug: false,
}
}
// SetScorerManager .
func (t *RecallTask) SetScorerManager(sc *ScorerManager) {
t.sc = sc
}
// SetRankerManager .
func (t *RecallTask) SetRankerManager(ranker *RankerManager) {
t.ranker = ranker
}
// SetFilterManager .
func (t *RecallTask) SetFilterManager(filter *FilterManager) {
t.filter = filter
}
// SetDebug .
func (t *RecallTask) SetDebug(d bool) {
t.debug = d
}
// Run .
func (t *RecallTask) Run() *[]byte {
// 获取倒排
raw, err := t.d.GetInvertedIndex(*t.ctx, t.info.Tag, false)
if err != nil {
log.Errorv(*t.ctx, log.KV("Tag", t.info.Tag), log.KV("redis", err))
return nil
}
var recallList, svidList []uint64
if len(raw) > 13 && binary.BigEndian.Uint64(raw[:8]) == 0xffffffffdeadbeef {
ii := new(index.InvertedIndex)
if err = ii.Load(raw); err != nil {
log.Errorv(*t.ctx, log.KV("Tag", t.info.Tag), log.KV("inverted index load", err))
return nil
}
recallList = ii.Data
} else {
if err = jsoniter.Unmarshal(raw, &recallList); err != nil {
log.Errorv(*t.ctx, log.KV("Tag", t.info.Tag), log.KV("jsoninter", err))
return nil
}
}
// filter
for _, svid := range recallList {
if !t.filter.DoFilter(svid, "default", t.info.Filter) {
// if !t.filter.DoFilter(svid, t.info.Filter) {
svidList = append(svidList, svid)
}
}
tuples := make([]*model.Tuple, len(svidList))
for i, v := range svidList {
// score
score := t.sc.DoScore(v, t.info.Scorer)
tuples[i] = &model.Tuple{
Svid: v,
Score: score,
}
}
// rank
t.ranker.DoRank(&tuples, t.info.Ranker, defaultCompare)
// truncate
size := int(t.info.Limit)
if size > len(tuples) {
size = len(tuples)
}
result := &Result{
TotalHit: int32(len(recallList)),
FilterCount: int32(len(svidList)),
FinalCount: int32(size),
Tuples: tuples[:size],
}
log.Infov(*t.ctx, log.KV("req_tag", t.info.Tag), log.KV("req_name", t.info.Name), log.KV("recall", len(recallList)), log.KV("filter", len(svidList)), log.KV("result", len(result.Tuples)))
return result.ToBytes()
}