Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys/api/grpc/v1:all-srcs",
"//app/service/bbq/recsys/cmd:all-srcs",
"//app/service/bbq/recsys/conf:all-srcs",
"//app/service/bbq/recsys/dao:all-srcs",
"//app/service/bbq/recsys/model:all-srcs",
"//app/service/bbq/recsys/server/grpc:all-srcs",
"//app/service/bbq/recsys/server/http:all-srcs",
"//app/service/bbq/recsys/service:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,26 @@
# v1.0.6
fix outOfRange bug
# v1.0.5
1. postprocess打散逻辑中增加连续4刷up主不能相同逻辑
# v1.0.4
hotfix redis改key
# v1.0.3
1. 修复postprocess中打散逻辑bug
2. 增加取b站up主推荐给bbq没有关注的人
3. 保存up主推荐的last records
4. 增加up主推荐debug接口
5. up主推荐去掉关注过的up主
6. up主推荐rank增加b站up主搞权重
7. postprocess代码拆分
# v1.0.2
1. 增加排序模型xgb 0.0.13
# v1.0.1
1. 增加up主推荐功能
# v1.0.0
1. 推荐系统第一期上线

View File

@@ -0,0 +1,7 @@
# Owner
liuzhiquan
# Author
# Reviewer
daiwei

View File

@@ -0,0 +1,12 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- liuzhiquan
labels:
- bbq
- service
- service/bbq/recsys
options:
no_parent_owners: true
reviewers:
- daiwei

View File

@@ -0,0 +1,12 @@
# recsys-service
# 项目简介
1. bbq推荐系统服务
# 编译环境
# 依赖包
# 编译执行

View File

@@ -0,0 +1,56 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "v1_proto",
srcs = ["api.proto"],
tags = ["automanaged"],
deps = ["@gogo_special_proto//github.com/gogo/protobuf/gogoproto"],
)
go_proto_library(
name = "v1_go_proto",
compilers = ["@io_bazel_rules_go//proto:gogofast_grpc"],
importpath = "go-common/app/service/bbq/recsys/api/grpc/v1",
proto = ":v1_proto",
tags = ["automanaged"],
deps = ["@com_github_gogo_protobuf//gogoproto:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = [],
embed = [":v1_go_proto"],
importpath = "go-common/app/service/bbq/recsys/api/grpc/v1",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
"@com_github_gogo_protobuf//sortkeys:go_default_library",
"@org_golang_google_grpc//:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
syntax = "proto3";
// use {app_id}.{version} as package name
package bbq.service.recsys.v1;
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
option (gogoproto.goproto_getters_all) = false;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
option (gogoproto.goproto_enum_prefix_all) = false;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
// specify golang package name
option go_package = "v1";
service Recsys {
rpc RecService (RecsysRequest) returns (RecsysResponse) {}
rpc RelatedRecService (RecsysRequest) returns (RecsysResponse) {}
rpc UpsRecService (RecsysRequest) returns (RecsysResponse) {}
}
message RecsysRequest {
int64 mid = 1 [(gogoproto.jsontag) = "mid",(gogoproto.moretags) = "form:\"mid\"",(gogoproto.customname)= "MID"];
string buvid = 2 [(gogoproto.jsontag) = "buvid",(gogoproto.moretags) = "form:\"buvid\"",(gogoproto.customname)= "BUVID"];
int32 limit = 3 [(gogoproto.jsontag) = "limit",(gogoproto.moretags) = "form:\"limit\""];
int32 offset = 4 [(gogoproto.jsontag) = "offset",(gogoproto.moretags) = "form:\"offset\""];
int64 svid = 5 [(gogoproto.jsontag) = "svid",(gogoproto.moretags) = "form:\"svid\"",(gogoproto.customname)= "SVID"];
string abtest = 6 [(gogoproto.jsontag) = "abtest",(gogoproto.moretags) = "form:\"abtest\""];
string traceID = 7 [(gogoproto.jsontag) = "traceID",(gogoproto.moretags) = "form:\"traceID\""];
bool debug_flag = 8 [(gogoproto.jsontag) = "debugFlag",(gogoproto.moretags) = "form:\"debugFlag\""];
string debug_type = 9 [(gogoproto.jsontag) = "debugType",(gogoproto.moretags) = "form:\"debugType\""];
string queryID = 10 [(gogoproto.jsontag) = "queryID",(gogoproto.moretags) = "form:\"queryID\""];
string app = 11 [(gogoproto.jsontag) = "app",(gogoproto.moretags) = "form:\"app\""];
string appVersion = 12 [(gogoproto.jsontag) = "appVersion",(gogoproto.moretags) = "form:\"appVersion\""];
}
message RecsysResponse {
map<string, string> message = 1;
repeated RecsysRecord list = 2;
}
message RecsysRecord {
int64 svid = 1;
double score = 2;
map<string, string> map = 3;
}

View File

@@ -0,0 +1,50 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = ["test.toml"],
importpath = "go-common/app/service/bbq/recsys/cmd",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/conf:go_default_library",
"//app/service/bbq/recsys/server/grpc:go_default_library",
"//app/service/bbq/recsys/server/http:go_default_library",
"//app/service/bbq/recsys/service:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/net/trace:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys/cmd/client:all-srcs",
"//app/service/bbq/recsys/cmd/relatedrecsys:all-srcs",
"//app/service/bbq/recsys/cmd/upsrecsys:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "client",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
importpath = "go-common/app/service/bbq/recsys/cmd/client",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//library/cache/redis:go_default_library",
"//library/container/pool:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,82 @@
package main
import (
"context"
"encoding/binary"
"flag"
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/library/cache/redis"
"go-common/library/container/pool"
"go-common/library/net/rpc/warden"
xtime "go-common/library/time"
"log"
"time"
"github.com/Dai0522/go-hash/bloomfilter"
)
var name, addr string
func init() {
flag.StringVar(&name, "name", "lily", "name")
flag.StringVar(&addr, "addr", "127.0.0.1:9000", "server addr")
}
func bf(svid int64) {
conf := &redis.Config{
Config: &pool.Config{
Active: 10,
Idle: 10,
},
Name: "recsys-service.user_profile",
Proto: "tcp",
Addr: "172.16.38.91:6379",
WriteTimeout: xtime.Duration(1 * time.Second),
DialTimeout: xtime.Duration(1 * time.Second),
ReadTimeout: xtime.Duration(1 * time.Second),
}
rp := redis.NewPool(conf)
conn := rp.Get(context.Background())
defer conn.Close()
b, _ := redis.Bytes(conn.Do("GET", "BBQ:BF:V1:5829468"))
if b != nil {
bf, _ := bloomfilter.Load(&b)
tmp := make([]byte, 8)
binary.LittleEndian.PutUint64(tmp, uint64(svid))
fmt.Println(bf.MightContain(tmp))
}
}
func main() {
flag.Parse()
cfg := &warden.ClientConfig{
Dial: xtime.Duration(time.Second * 3),
Timeout: xtime.Duration(time.Second * 3),
}
cc, err := warden.NewClient(cfg).Dial(context.Background(), addr)
if err != nil {
log.Fatalf("new client failed!err:=%v", err)
return
}
var MID int64 = 5829468 // 10022647 //100011563
buvID := "d9972de637d2f3b8939ee628a7ea789b"
client := rpc.NewRecsysClient(cc)
resp, err := client.RecService(context.Background(), &rpc.RecsysRequest{
MID: MID,
BUVID: buvID,
Limit: 5,
DebugFlag: true,
DebugType: "rank",
})
if err != nil {
log.Fatalf("say hello failed!err:=%v", err)
return
}
fmt.Printf("got Reply: %+v", resp)
for _, v := range resp.List {
fmt.Println(v.Svid)
bf(v.Svid)
}
}

View File

@@ -0,0 +1,46 @@
package main
import (
"flag"
"os"
"os/signal"
"syscall"
"go-common/app/service/bbq/recsys/conf"
"go-common/app/service/bbq/recsys/server/grpc"
"go-common/app/service/bbq/recsys/server/http"
"go-common/app/service/bbq/recsys/service"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
"go-common/library/net/trace"
)
func main() {
flag.Parse()
if err := conf.Init(); err != nil {
panic(err)
}
log.Init(conf.Conf.Log)
defer log.Close()
log.Info("start")
trace.Init(conf.Conf.Tracer)
defer trace.Close()
ecode.Init(conf.Conf.Ecode)
srv := service.New(conf.Conf)
grpc.New(srv)
http.Init(conf.Conf, srv)
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
for {
s := <-c
log.Info("get a signal %s", s.String())
switch s {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
log.Info("exit")
return
case syscall.SIGHUP:
default:
return
}
}
}

View File

@@ -0,0 +1,40 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "relatedrecsys",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
importpath = "go-common/app/service/bbq/recsys/cmd/relatedrecsys",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/time:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,48 @@
package main
import (
"context"
"flag"
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/library/net/rpc/warden"
xtime "go-common/library/time"
"log"
"time"
)
var name, addr string
func init() {
flag.StringVar(&name, "name", "lily", "name")
flag.StringVar(&addr, "addr", "127.0.0.1:9000", "server addr")
}
func main() {
flag.Parse()
cfg := &warden.ClientConfig{
Dial: xtime.Duration(time.Second * 3),
Timeout: xtime.Duration(time.Second * 3),
}
cc, err := warden.NewClient(cfg).Dial(context.Background(), addr)
if err != nil {
log.Fatalf("new client failed!err:=%v", err)
return
}
var mid int64 = 10022647 //100011563
buvid := "123456"
client := rpc.NewRecsysClient(cc)
resp, err := client.RelatedRecService(context.Background(), &rpc.RecsysRequest{
MID: mid,
BUVID: buvid,
Limit: 10,
Offset: 0,
SVID: 114888,
})
if err != nil {
log.Fatalf("say hello failed!err:=%v", err)
return
}
fmt.Printf("got Reply: %+v", resp)
}

View File

@@ -0,0 +1,112 @@
[log]
# dir = "/tmp/log/recsys"
stdout = true
[bm]
addr = "0.0.0.0:8804"
timeout = "2s"
[mysql]
addr = "127.0.0.1:3306"
dsn = "test:test@tcp(127.0.0.1:3306)/test?timeout=200ms&readTimeout=200ms&writeTimeout=200ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"
readDSN = ["test:test@tcp(127.0.0.2:3306)/test? timeout=200ms&readTimeout=200ms&writeTimeout=200ms&parseTime=true&loc=Local&charset=utf8,utf8mb4","test:test@tcp(127.0.0.3:3306)/test?timeout=200ms&readTimeout=200ms&writeTimeout=200ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"]
active = 20
idle = 10
idleTimeout ="4h"
queryTimeout = "100ms"
execTimeout = "100ms"
tranTimeout = "200ms"
[redis]
name = "recsys-service.user_profile"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[bfredis]
name = "recsys-service.bloomfilter"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[memcache]
name = "recsys-service"
proto = "tcp"
addr = ""
active = 50
idle = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "24h"
[httpClient]
[httpClient.normal]
dial = "500ms"
timeout = "1s"
keepAlive = "60s"
timer = 10
key = "7c7ac0db1aa05587"
secret = "9a6d62d93290c5f771ad381e9ca23f26"
[httpClient.normal.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100
[httpClient.slow]
dial = "500ms"
timeout = "1s"
keepAlive = "60s"
timer = 10
key = "7c7ac0db1aa05587"
secret = "9a6d62d93290c5f771ad381e9ca23f26"
[httpClient.slow.breaker]
window = "3s"
sleep = "100ms"
bucket = 10
ratio = 0.5
request = 100
[grpcClient]
[grpcClient.user]
addr = "discovery://default/bbq.service.user"
[grpcClient.user.wardenConf]
dial = "3000ms"
timeout = "5000ms"
[grpcClient.recall]
addr = "172.22.38.129:9000"
[grpcClient.recall.wardenConf]
dial = "3000ms"
timeout = "5000ms"
[grpcClient.relation]
addr = "discovery://default/account.service.relation"
[grpcClient.relation.wardenConf]
dial = "3000ms"
timeout = "5000ms"
[infoc]
taskID = "001555"
proto = "tcp"
addr = "172.18.33.124:15140"
chanSize = 10240
[workerPool]
maxWorkers = 512
maxIdleWorkers = 256
minIdleWorkers = 128
keepAlive = "30s"

View File

@@ -0,0 +1,40 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_binary",
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
importpath = "go-common/app/service/bbq/recsys/cmd/upsrecsys",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/time:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_binary(
name = "upsrecsys",
embed = [":go_default_library"],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,48 @@
package main
import (
"context"
"flag"
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/library/net/rpc/warden"
xtime "go-common/library/time"
"log"
"time"
)
var name, addr string
func init() {
flag.StringVar(&name, "name", "lily", "name")
flag.StringVar(&addr, "addr", "127.0.0.1:9000", "server addr")
}
func main() {
flag.Parse()
cfg := &warden.ClientConfig{
Dial: xtime.Duration(time.Second * 3),
Timeout: xtime.Duration(time.Second * 3),
}
cc, err := warden.NewClient(cfg).Dial(context.Background(), addr)
if err != nil {
log.Fatalf("new client failed!err:=%v", err)
return
}
var mid int64 = 6622959 //100011563
buvid := "123456"
client := rpc.NewRecsysClient(cc)
resp, err := client.UpsRecService(context.Background(), &rpc.RecsysRequest{
MID: mid,
BUVID: buvid,
Limit: 10,
Offset: 0,
SVID: 114888,
})
if err != nil {
log.Fatalf("say hello failed!err:=%v", err)
return
}
fmt.Printf("got Reply: %+v", resp)
}

View File

@@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["conf.go"],
importpath = "go-common/app/service/bbq/recsys/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/cache/memcache:go_default_library",
"//library/cache/redis:go_default_library",
"//library/conf:go_default_library",
"//library/database/sql:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/log/infoc:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/net/trace:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,112 @@
package conf
import (
"errors"
"flag"
"go-common/library/cache/memcache"
"go-common/library/cache/redis"
"go-common/library/conf"
"go-common/library/database/sql"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
"go-common/library/log/infoc"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
"go-common/library/net/rpc/warden"
"go-common/library/net/trace"
xtime "go-common/library/time"
"github.com/BurntSushi/toml"
)
var (
confPath string
client *conf.Client
// Conf config
Conf = &Config{}
)
// Config .
type Config struct {
Log *log.Config
BM *bm.ServerConfig
Verify *verify.Config
Tracer *trace.Config
Redis *redis.Config
BFRedis *redis.Config
Memcache *memcache.Config
MySQL *sql.Config
Ecode *ecode.Config
Infoc *infoc.Config
// grpc server
GRPCServer *warden.ServerConfig
GRPCClient map[string]*GRPCConfig
// workpool
WorkerPool *PoolConfig
}
// PoolConfig .
type PoolConfig struct {
MaxWorkers uint64
MaxIdleWorkers uint64
MinIdleWorkers uint64
KeepAlive xtime.Duration
}
// GRPCConfig .
type GRPCConfig struct {
WardenConf *warden.ClientConfig
Addr string
}
func init() {
flag.StringVar(&confPath, "conf", "", "default config path")
}
// Init init conf
func Init() error {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if load() != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Toml2(); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return err
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,57 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"bloomfilter.go",
"dao.go",
"parallel.go",
"recall.go",
"relation.go",
"user.go",
],
importpath = "go-common/app/service/bbq/recsys/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/conf:go_default_library",
"//app/service/bbq/recsys/dao/parallel:go_default_library",
"//app/service/bbq/recsys/model:go_default_library",
"//app/service/bbq/recsys/service/retrieve:go_default_library",
"//app/service/bbq/search/api/grpc/v1:go_default_library",
"//app/service/bbq/user/api:go_default_library",
"//app/service/main/relation/api:go_default_library",
"//library/cache/memcache:go_default_library",
"//library/cache/redis:go_default_library",
"//library/database/sql:go_default_library",
"//library/log:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library",
"//vendor/github.com/Dai0522/workpool:go_default_library",
"//vendor/github.com/json-iterator/go:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys/dao/parallel:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,105 @@
package dao
import (
"context"
"errors"
"fmt"
"strconv"
"time"
"go-common/app/service/bbq/recsys/dao/parallel"
"go-common/library/log"
"go-common/library/cache/redis"
"github.com/Dai0522/go-hash/bloomfilter"
"github.com/Dai0522/workpool"
)
const (
_baseBfKey = "BBQ:BF:V1:%s:%s"
)
func userBFRedisKey(k string) string {
d := time.Now().Format("20060102")
return fmt.Sprintf(_baseBfKey, k, d)
}
func (d *Dao) loadBF(c context.Context, mid int64, buvid string) (bf *bloomfilter.BloomFilter, err error) {
var tasks []workpool.Task
if buvid != "" {
buvidK := userBFRedisKey(buvid)
t := parallel.NewRedisTask(&c, d.bfRedis, "GET", buvidK)
tasks = append(tasks, t)
}
if mid != 0 {
midK := userBFRedisKey(strconv.FormatInt(mid, 10))
t := parallel.NewRedisTask(&c, d.bfRedis, "GET", midK)
tasks = append(tasks, t)
}
ftTasks := d.parallelTask(tasks)
for _, ft := range *ftTasks {
raw, e := ft.Wait(100 * time.Millisecond)
if e != nil && e != redis.ErrNil {
log.Errorv(c, log.KV("BF_GET_ERROR", e), log.KV("TASK", ft.T.(*parallel.RedisTask)))
continue
}
if raw == nil || len(*raw) == 0 {
continue
}
tmp, e := bloomfilter.Load(raw)
if e != nil || tmp == nil {
log.Errorv(c, log.KV("BF_LOAD_ERROR", e), log.KV("TASK", ft.T.(*parallel.RedisTask)), log.KV("raw", *raw))
continue
}
bf = bloomfilter.Merge(bf, tmp)
}
if bf == nil {
bf, err = bloomfilter.New(1000, 0.0001)
}
return
}
// WriteBF .
func (d *Dao) WriteBF(c context.Context, mid int64, buvid string, svid []uint64) (bool, error) {
if mid == int64(0) && buvid == "" {
return false, errors.New("mid && buvid can't be empty")
}
// load bf from redis
bf, err := d.loadBF(c, mid, buvid)
if err != nil {
return false, err
}
// put svid
for _, v := range svid {
bf.PutUint64(v)
}
// store bf into redis
var tasks []workpool.Task
b := bf.Serialized()
if buvid != "" {
buvidK := userBFRedisKey(buvid)
t := parallel.NewRedisTask(&c, d.bfRedis, "SETEX", buvidK, 86400, *b)
tasks = append(tasks, t)
}
if mid != int64(0) {
midK := userBFRedisKey(strconv.FormatInt(mid, 10))
t := parallel.NewRedisTask(&c, d.bfRedis, "SETEX", midK, 86400, *b)
tasks = append(tasks, t)
}
ftTasks := d.parallelTask(tasks)
for _, ft := range *ftTasks {
_, err = ft.Wait(100 * time.Millisecond)
if err != nil {
log.Errorv(c, log.KV("BF_SET_ERROR", err))
}
}
return true, err
}

View File

@@ -0,0 +1,96 @@
package dao
import (
"context"
relation "go-common/app/service/main/relation/api"
"time"
recallv1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/app/service/bbq/recsys/conf"
searchv1 "go-common/app/service/bbq/search/api/grpc/v1"
user "go-common/app/service/bbq/user/api"
"go-common/library/cache/memcache"
"go-common/library/cache/redis"
xsql "go-common/library/database/sql"
"go-common/library/net/rpc/warden"
"github.com/Dai0522/workpool"
)
// Dao dao
type Dao struct {
c *conf.Config
mc *memcache.Pool
redis *redis.Pool
bfRedis *redis.Pool
db *xsql.DB
wp *workpool.Pool
SearchClient searchv1.SearchClient
RecallClient recallv1.RecsysRecallClient
UserClient user.UserClient
RelationClient relation.RelationClient
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
wpConf := &workpool.PoolConfig{
MaxWorkers: c.WorkerPool.MaxWorkers,
MaxIdleWorkers: c.WorkerPool.MaxIdleWorkers,
MinIdleWorkers: c.WorkerPool.MinIdleWorkers,
KeepAlive: time.Duration(c.WorkerPool.KeepAlive),
}
wp, err := workpool.NewWorkerPool(1024, wpConf)
if err != nil {
panic(err)
}
wp.Start()
dao = &Dao{
c: c,
redis: redis.NewPool(c.Redis),
bfRedis: redis.NewPool(c.BFRedis),
db: xsql.NewMySQL(c.MySQL),
wp: wp,
RecallClient: newRecallClient(c.GRPCClient["recall"]),
UserClient: newUserClient(c.GRPCClient["user"]),
RelationClient: newRelationClient(c.GRPCClient["relation"]),
}
return
}
func newRecallClient(cfg *conf.GRPCConfig) recallv1.RecsysRecallClient {
cc, err := warden.NewClient(cfg.WardenConf).Dial(context.Background(), cfg.Addr)
if err != nil {
panic(err)
}
return recallv1.NewRecsysRecallClient(cc)
}
func newUserClient(cfg *conf.GRPCConfig) user.UserClient {
cc, err := warden.NewClient(cfg.WardenConf).Dial(context.Background(), cfg.Addr)
if err != nil {
panic(err)
}
return user.NewUserClient(cc)
}
func newRelationClient(cfg *conf.GRPCConfig) relation.RelationClient {
cc, err := warden.NewClient(cfg.WardenConf).Dial(context.Background(), cfg.Addr)
if err != nil {
panic(err)
}
return relation.NewRelationClient(cc)
}
// Close close the resource.
func (d *Dao) Close() {
d.mc.Close()
d.redis.Close()
d.bfRedis.Close()
d.db.Close()
}
// Ping dao ping
func (d *Dao) Ping(c context.Context) error {
// TODO: if you need use mc,redis, please add
return d.db.Ping(c)
}

View File

@@ -0,0 +1,58 @@
package dao
import (
"go-common/app/service/bbq/recsys/dao/parallel"
"github.com/Dai0522/workpool"
)
// parallelTask2 .
func (d *Dao) parallelTask2(tasks map[string]workpool.Task) map[string]workpool.FutureTask {
ftMap := make(map[string]workpool.FutureTask)
for name, task := range tasks {
ft := workpool.NewFutureTask(task)
retry := 0
err := d.wp.Submit(ft)
for err != nil && retry < 3 {
err = d.wp.Submit(ft)
retry++
}
ftMap[name] = *ft
}
return ftMap
}
// parallelTask .
func (d *Dao) parallelTask(tasks []workpool.Task) *[]workpool.FutureTask {
ftArr := make([]workpool.FutureTask, len(tasks))
for i := range tasks {
ft := workpool.NewFutureTask(tasks[i])
retry := 0
err := d.wp.Submit(ft)
for err != nil && retry < 3 {
err = d.wp.Submit(ft)
retry++
}
ftArr[i] = *ft
}
return &ftArr
}
// ParallelRedis run redis cmd parallel
func (d *Dao) ParallelRedis(tasks *[]parallel.RedisTask) *[]workpool.FutureTask {
ftArr := make([]workpool.FutureTask, len(*tasks))
for i := range *tasks {
ft := workpool.NewFutureTask(&(*tasks)[i])
retry := 0
err := d.wp.Submit(ft)
for err != nil && retry < 3 {
err = d.wp.Submit(ft)
retry++
}
ftArr[i] = *ft
}
return &ftArr
}

View File

@@ -0,0 +1,32 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["redis.go"],
importpath = "go-common/app/service/bbq/recsys/dao/parallel",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/cache/redis:go_default_library",
"//library/log:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,63 @@
package parallel
import (
"context"
"unsafe"
"go-common/library/cache/redis"
"go-common/library/log"
)
// RedisTask .
type RedisTask struct {
ctx *context.Context
name string
pool *redis.Pool
cmd string
args []interface{}
}
// NewRedisTaskWithName new redis parallel task
func NewRedisTaskWithName(ctx *context.Context, name string, pool *redis.Pool, cmd string, args ...interface{}) *RedisTask {
return &RedisTask{
ctx: ctx,
name: name,
pool: pool,
cmd: cmd,
args: args,
}
}
// NewRedisTask new redis parallel task
func NewRedisTask(ctx *context.Context, pool *redis.Pool, cmd string, args ...interface{}) *RedisTask {
return &RedisTask{
ctx: ctx,
pool: pool,
cmd: cmd,
args: args,
}
}
// Run .
func (rt *RedisTask) Run() (result *[]byte) {
conn := rt.pool.Get(*rt.ctx)
defer conn.Close()
reply, err := conn.Do(rt.cmd, rt.args...)
if err != nil {
log.Error("RedisTask Run error:[%+v]", err)
return
}
switch reply := reply.(type) {
case []byte:
result = &reply
case string:
b := []byte(reply)
result = &b
default:
result = (*[]byte)(unsafe.Pointer(&reply))
}
return
}

View File

@@ -0,0 +1,68 @@
package dao
import (
"context"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
"go-common/library/cache/redis"
"go-common/library/log"
"strconv"
"strings"
)
//DownGradeRecall ...
func (d *Dao) DownGradeRecall(c context.Context) (response *recsys.RecsysResponse, err error) {
conn := d.redis.Get(c)
defer conn.Close()
key := retrieve.RecallHotDefault
var str string
if str, err = redis.String(conn.Do("GET", key)); err != nil {
if err == redis.ErrNil {
err = nil
} else {
log.Errorw(c, "recall", "get hot recall error", "err", err)
}
}
response = new(recsys.RecsysResponse)
response.Message = make(map[string]string)
records := make([]*recsys.RecsysRecord, 0)
response.Message[model.ResponseDownGrade] = "2"
for _, svidStr := range strings.Split(str, ",") {
svid, _ := strconv.ParseInt(svidStr, 10, 64)
record := &recsys.RecsysRecord{
Svid: svid,
Score: 0,
Map: make(map[string]string),
}
record.Map[model.RecallClasses] = retrieve.HotRecall
records = append(records, record)
}
key = retrieve.RecallOpVideoKey
if str, err = redis.String(conn.Do("GET", key)); err != nil {
if err == redis.ErrNil {
err = nil
} else {
log.Errorw(c, "recall", "get selection recall error", "err", err)
}
}
response = new(recsys.RecsysResponse)
response.Message = make(map[string]string)
for _, svidStr := range strings.Split(str, ",") {
svid, _ := strconv.ParseInt(svidStr, 10, 64)
record := &recsys.RecsysRecord{
Svid: svid,
Score: 0,
Map: make(map[string]string),
}
record.Map[model.RecallClasses] = retrieve.SelectionRecall
records = append(records, record)
}
response.List = records
return
}

View File

@@ -0,0 +1,46 @@
package dao
import (
"context"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/user/api"
"go-common/library/log"
)
//GetUserFollow ...
func (d *Dao) GetUserFollow(c context.Context, mid int64, u *model.UserProfile) (err error) {
if mid == 0 {
return
}
relationReq := &api.ListRelationReq{Mid: mid}
listRelationReply, err := d.UserClient.ListFollow(c, relationReq)
if err != nil {
log.Errorv(c)
return
}
for _, MID := range listRelationReply.List {
u.BBQFollow[MID] = 1
}
return
}
//GetUserBlack ...
func (d *Dao) GetUserBlack(c context.Context, mid int64, u *model.UserProfile) (err error) {
if mid == 0 {
return
}
relationReq := &api.ListRelationReq{Mid: mid}
listRelationReply, err := d.UserClient.ListBlack(c, relationReq)
if err != nil {
log.Errorv(c)
return
}
for _, MID := range listRelationReply.List {
u.BBQBlack[MID] = 1
}
return
}

View File

@@ -0,0 +1,496 @@
package dao
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"unsafe"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/dao/parallel"
"go-common/app/service/bbq/recsys/model"
"go-common/library/cache/redis"
"go-common/library/log"
"github.com/Dai0522/workpool"
"github.com/json-iterator/go"
)
//user const
const (
TaskLastPage = "TaskLastPage"
TaskLastUpsPage = "TaskLastUpsPage"
TaskBiliUserProfile = "TaskBiliUserProfile"
TaskBBQUserProfile = "TaskBBQUserProfile"
TaskBBQDeviceProfile = "TaskBBQDeviceProfile"
TaskUserLike = "TaskUserLike"
TaskUserLikeYesterday = "TaskUserLikeYesterday"
TaskUserPlay = "TaskUserPlay"
TaskUserPlayYesterday = "TaskUserPlayYesterday"
TaskDevicePlay = "TaskDevicePlay"
TaskDevicePlayYesterday = "TaskDevicePlayYesterday"
TaskUserFollow = "TaskUserFollow"
TaskUserFollowYesterday = "TaskUserFollowYesterday"
//_BBQDeviceProfileKey = "bbq:device:profile:%s"
_BBQDeviceProfileKey = "bbq:device:profile:{buvid}:%s"
_BBQUserProfileKey = "bbq:user:profile:%d"
_BiliUserProfileKey = "bbq:user:basic:%d"
_LastFewPageRecords1 = "bbq:last:v1:mid:%d"
_LastFewPageRecords2 = "bbq:last:v1:buvid:%s"
_LastFewUpsPageRecords1 = "bbq:last:v1:ups:mid:%d"
_LastFewUpsPageRecords2 = "bbq:last:v1:ups:buvid:%s"
_RealTimeUserLike = "storm:v2:u:%d:like:%s"
_RealTimeUserPlayMID = "storm:v2:u:%d:%s:view:100"
_RealTimeUserPlayBuvID = "storm:v2:u:%s:%s:view:100"
_RealTimeUserFollow = "storm:v2:u:%d:%s:follow:100"
_ModelTest = "bbq:model:init"
_Zone = "zone"
_Tag = "tag"
_Up = "up"
)
//LastPageRedisKey for main rec process
func (d *Dao) LastPageRedisKey(mid int64, buvid string) (key string) {
if mid > 0 {
key = fmt.Sprintf(_LastFewPageRecords1, mid)
} else {
key = fmt.Sprintf(_LastFewPageRecords2, buvid)
}
return
}
//LastUpsPageRedisKey for ups rec process
func (d *Dao) LastUpsPageRedisKey(mid int64, buvid string) (key string) {
if mid > 0 {
key = fmt.Sprintf(_LastFewUpsPageRecords1, mid)
} else {
key = fmt.Sprintf(_LastFewUpsPageRecords2, buvid)
}
return
}
//InitModel ...
func (d *Dao) InitModel(c context.Context, weights map[string]float64) (err error) {
conn := d.redis.Get(c)
defer conn.Close()
key := _ModelTest
if result, err := redis.String(conn.Do("GET", key)); err == nil {
for _, field := range strings.Split(result, ",") {
featureWeightPair := strings.Split(field, ":")
if len(featureWeightPair) >= 2 {
feature := featureWeightPair[0]
weight, _ := strconv.ParseFloat(featureWeightPair[1], 64)
weights[feature] = weight
}
}
}
return
}
//StoreRecResults store rec or upsRec history according to getKeyFunc
func (d *Dao) StoreRecResults(c context.Context, u *model.UserProfile, mid int64, buvid string, response *recsys.RecsysResponse, getKeyFunc func(int64, string) string, lastRecords []model.Record4Dup) (err error) {
conn := d.redis.Get(c)
defer conn.Close()
key := getKeyFunc(mid, buvid)
maxPageNum := 10
size := len(response.List)
if len(lastRecords) > maxPageNum*size {
lastRecords = lastRecords[size:]
}
for _, record := range response.List {
svid := record.Svid
mid, ok1 := record.Map[model.UperMid]
tag, ok2 := record.Map[model.ScatterTag]
if ok1 && ok2 {
lastRecords = append(lastRecords, model.Record4Dup{
SVID: svid,
MID: mid,
Tag: tag,
})
}
}
bytes, _ := jsoniter.Marshal(lastRecords)
_, err = conn.Do("SETEX", key, 86400, bytes)
if err != nil {
log.Error("store last few records error: ", err)
}
////for test
//if mid == 28272030 || mid == 390642849 {
// return
//}
// write bloomfilter for es
svids := make([]uint64, len(response.List))
for i, v := range response.List {
svids[i] = uint64(v.Svid)
}
if _, bfErr := d.WriteBF(c, mid, buvid, svids); bfErr != nil {
log.Errorv(c, log.KV("Write BF error: ", bfErr))
}
return
}
//InitUserProfile ...
func (d *Dao) InitUserProfile(c context.Context, mid int64, buvid string) (u *model.UserProfile) {
u = &model.UserProfile{
Mid: mid,
Buvid: buvid,
Name: "",
Gender: -1,
ViewVideos: []int64{},
Zones1: map[string]float64{},
BiliTags: map[string]float64{}, //bili
Zones2: map[string]float64{}, //bili
FollowUps: map[int64]int64{}, //bili
BBQTags: map[string]float64{}, //bbq
BBQZones: map[string]float64{}, //bbq
BBQPrefUps: map[int64]int64{}, //bbq
BBQFollowAction: map[int64]int64{}, //bbq
BBQFollow: map[int64]int64{}, //bbq
BBQBlack: map[int64]int64{}, //bbq
PosVideos: map[int64]int64{},
NegVideos: map[int64]int64{},
LikeVideos: map[int64]int64{},
LikeTags: map[string]float64{},
LikeTagIDs: map[int64]int64{},
LikeUPs: map[int64]int64{},
PosTagIDs: map[int64]int64{},
NegTagIDs: map[int64]int64{},
PosTags: map[string]float64{},
NegTags: map[string]float64{},
LastRecords: []model.Record4Dup{},
}
return
}
//LoadUserProfile load user info from redis parallel
func (d *Dao) LoadUserProfile(c context.Context, mid int64, buvid string) (userProfile *model.UserProfile, err error) {
tasks := make(map[string]workpool.Task)
userProfile = d.InitUserProfile(c, mid, buvid)
// lastPage
if mid != 0 || buvid != "" {
taskName := TaskLastPage
key := fmt.Sprintf(_LastFewPageRecords2, buvid)
if mid != 0 {
key = fmt.Sprintf(_LastFewPageRecords1, mid)
}
task := parallel.NewRedisTaskWithName(&c, taskName, d.redis, "GET", key)
tasks[taskName] = task
}
if mid != 0 || buvid != "" {
taskName := TaskLastUpsPage
key := fmt.Sprintf(_LastFewUpsPageRecords2, buvid)
if mid != 0 {
key = fmt.Sprintf(_LastFewUpsPageRecords1, mid)
}
task := parallel.NewRedisTaskWithName(&c, taskName, d.redis, "GET", key)
tasks[taskName] = task
}
// user profile bili
if mid != 0 {
taskName := TaskBiliUserProfile
key := fmt.Sprintf(_BiliUserProfileKey, mid)
task := parallel.NewRedisTaskWithName(&c, taskName, d.redis, "HGETALL", key)
tasks[taskName] = task
}
// user profile bbq: mid
if mid != 0 {
taskName := TaskBBQUserProfile
key := fmt.Sprintf(_BBQUserProfileKey, mid)
task := parallel.NewRedisTaskWithName(&c, taskName, d.redis, "HGETALL", key)
tasks[taskName] = task
}
// user profile bbq: buvid
if mid == 0 && buvid != "" {
taskName := TaskBBQDeviceProfile
key := fmt.Sprintf(_BBQDeviceProfileKey, buvid)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
// user real time like
today := time.Now().Format("20060102")
yesterday := time.Now().AddDate(0, 0, -1).Format("20060102")
if mid != 0 {
taskName := TaskUserLike
key := fmt.Sprintf(_RealTimeUserLike, mid, today)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid != 0 {
taskName := TaskUserLikeYesterday
key := fmt.Sprintf(_RealTimeUserLike, mid, yesterday)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid != 0 {
taskName := TaskUserFollow
key := fmt.Sprintf(_RealTimeUserFollow, mid, today)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid != 0 {
taskName := TaskUserFollowYesterday
key := fmt.Sprintf(_RealTimeUserFollow, mid, yesterday)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid != 0 {
taskName := TaskUserPlay
key := fmt.Sprintf(_RealTimeUserPlayMID, mid, today)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid != 0 {
taskName := TaskUserPlayYesterday
key := fmt.Sprintf(_RealTimeUserPlayMID, mid, yesterday)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid == 0 && buvid != "" {
taskName := TaskDevicePlay
key := fmt.Sprintf(_RealTimeUserPlayBuvID, buvid, today)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
if mid == 0 && buvid != "" {
taskName := TaskDevicePlayYesterday
key := fmt.Sprintf(_RealTimeUserPlayBuvID, buvid, yesterday)
task := parallel.NewRedisTask(&c, d.redis, "HGETALL", key)
tasks[taskName] = task
}
ftTasks := d.parallelTask2(tasks)
for name, task := range ftTasks {
var raw *[]byte
raw, err = task.Wait(100 * time.Millisecond)
if err != nil && err != redis.ErrNil {
log.Errorv(c, log.KV("REDIS_GET_ERROR", err))
continue
}
if raw == nil {
continue
}
switch name {
case TaskLastPage:
setLastPage(raw, userProfile, "lastRecords")
case TaskLastUpsPage:
setLastPage(raw, userProfile, "lastUpsRecords")
case TaskBiliUserProfile:
setUserProfileBili(raw, err, userProfile)
case TaskBBQDeviceProfile:
setUserProfileBBQ(raw, err, userProfile)
case TaskBBQUserProfile:
setUserProfileBBQ(raw, err, userProfile)
case TaskUserLikeYesterday:
setUserLikeInfo(raw, err, userProfile)
case TaskUserLike:
setUserLikeInfo(raw, err, userProfile)
case TaskUserFollowYesterday:
setUserFollowInfo(raw, err, userProfile)
case TaskUserFollow:
setUserFollowInfo(raw, err, userProfile)
case TaskUserPlayYesterday:
setUserPlayInfo(raw, err, userProfile)
case TaskDevicePlayYesterday:
setUserPlayInfo(raw, err, userProfile)
case TaskUserPlay:
setUserPlayInfo(raw, err, userProfile)
case TaskDevicePlay:
setUserPlayInfo(raw, err, userProfile)
}
}
if err == redis.ErrNil {
err = nil
}
return
}
func setUserProfileBBQ(bytes *[]byte, inErr error, u *model.UserProfile) (err error) {
var res map[string]string
if res, err = redis.StringMap(*(*interface{})(unsafe.Pointer(bytes)), inErr); err != nil {
if err == redis.ErrNil {
err = nil
} else {
log.Error("redis HGETALL failed error(%v)", err)
}
}
for key, value := range res {
if key == _Zone {
zone2s := strings.Split(value, ",")
for _, zone2 := range zone2s {
u.BBQZones[zone2] = 1.0
}
} else if key == _Tag {
tags := strings.Split(value, ",")
for _, tag := range tags {
u.BBQTags[tag] = 1.0
}
} else if key == _Up {
ups := strings.Split(value, ",")
for _, upStr := range ups {
upMID, _ := strconv.ParseInt(upStr, 10, 64)
u.BBQPrefUps[upMID] = 1
}
}
}
return
}
func setUserProfileBili(bytes *[]byte, inErr error, u *model.UserProfile) {
var res map[string]string
var err error
if res, err = redis.StringMap(*(*interface{})(unsafe.Pointer(bytes)), inErr); err != nil {
if err == redis.ErrNil {
err = nil
} else {
log.Error("redis HGETALL failed error(%v)", err)
}
}
for key, value := range res {
if key == _Zone {
zone2s := strings.Split(value, ",")
for _, zone2 := range zone2s {
u.Zones2[zone2] = 1.0
}
} else if key == _Tag {
tags := strings.Split(value, ",")
for _, tag := range tags {
u.BiliTags[tag] = 1.0
}
} else if key == _Up {
ups := strings.Split(value, ",")
for _, upStr := range ups {
upMID, _ := strconv.ParseInt(upStr, 10, 64)
u.FollowUps[upMID] = 1
}
}
}
}
func setUserLikeInfo(bytes *[]byte, inErr error, u *model.UserProfile) {
var object struct {
SVID int64 `json:"svid"`
CTime int64 `json:"ctime"`
BuvID string `json:"buvid"`
}
var res map[string]string
var err error
if res, err = redis.StringMap(*(*interface{})(unsafe.Pointer(bytes)), inErr); err != nil {
if err != redis.ErrNil {
log.Error("redis HGETALL failed error(%v)", err)
}
}
for _, value := range res {
err = jsoniter.UnmarshalFromString(value, &object)
if err != nil {
log.Error("json parse error: %v", err)
}
u.LikeVideos[object.SVID] = object.CTime
}
}
func setUserFollowInfo(bytes *[]byte, inErr error, u *model.UserProfile) {
var object struct {
UpID int64 `json:"upid"`
CTime int64 `json:"ctime"`
MID int64 `json:"mid"`
}
var res map[string]string
var err error
if res, err = redis.StringMap(*(*interface{})(unsafe.Pointer(bytes)), inErr); err != nil {
if err != redis.ErrNil {
log.Error("user real time follow redis HGETALL failed error(%v)", err)
}
}
for _, value := range res {
err = jsoniter.UnmarshalFromString(value, &object)
if err != nil {
log.Error("json parse error: %v", err)
}
u.BBQFollowAction[object.UpID] = object.CTime
}
}
func setUserPlayInfo(bytes *[]byte, inErr error, u *model.UserProfile) {
var object struct {
Svid int64 `json:"svid"`
CTime int64 `json:"ctime"`
Duration int64 `json:"duration"`
ViewDuration int64 `json:"viewDuration"`
}
var res map[string]string
var err error
if res, err = redis.StringMap(*(*interface{})(unsafe.Pointer(bytes)), inErr); err != nil {
if err != redis.ErrNil {
log.Error("redis HGETALL failed error(%v)", err)
} else {
err = nil
}
}
for _, value := range res {
err = jsoniter.UnmarshalFromString(value, &object)
if err != nil {
log.Error("json parse error: %v", err)
continue
}
u.ViewVideos = append(u.ViewVideos, object.Svid)
if object.ViewDuration >= 15000 || (object.Duration >= 5000 && float64(object.ViewDuration) >= 0.95*float64(object.Duration)) {
u.PosVideos[object.Svid] = object.CTime
}
if object.ViewDuration <= 500 {
u.NegVideos[object.Svid] = object.CTime
}
}
}
func setLastPage(bytes *[]byte, u *model.UserProfile, lastRecordType string) {
var results []model.Record4Dup
if len(*bytes) == 0 {
return
}
err := jsoniter.Unmarshal(*bytes, &results)
if err != nil {
log.Error("UnmarshalFromString value(%v) error(%v)", bytes, err)
} else {
if lastRecordType == "lastRecords" {
u.LastRecords = results
} else {
u.LastUpsRecords = results
}
}
}

View File

@@ -0,0 +1,34 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"debug.go",
"field.go",
"model.go",
"user.go",
],
importpath = "go-common/app/service/bbq/recsys/model",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,48 @@
package model
//rank model, rank feature
const (
ResponseDownGrade = "ResponseDownGrade" // 1:recall service 2: redis
ResponseRecallCount = "ResponseRecallCount"
ResponseCount = "ResponseCount"
ResponseRecallStat = "ResponseRecallStat"
RankModelName = "RankModelName"
RankModelScore = "RankModelScore"
QueryID = "QueryID"
ScoreMessage = "scoreMessage"
FeatureString = "feature"
OrderRecall = "Order01Recall"
OrderRanker = "Order02Ranker"
OrderWeakIntervention = "Order03WeakIntervention"
OrderFinal = "Order04Final"
OrderPostProcess = "OrderPostProcess"
//rank score
ScoreTotalScore = "TotalScore"
ScoreBiliZone = "ScoreBiliZone"
ScoreBiliTag = "ScoreBiliTag"
ScoreLikeTag = "scoreLikeTag"
ScorePosTag = "scorePosTag"
ScoreNegTag = "scoreNegTag"
ScoreMatchTitle = "scoreMatchTitle"
ScoreFollowUP = "scoreFollowUp"
ScoreOperationLevel = "scoreOperationLevel"
BiliPlayNum = "BiliPlayNum"
BiliFavRatio = "BiliFavRatio"
BiliLikeRatio = "BiliLikeRatio"
BiliShareRatio = "BiliShareRatio"
BiliCoinRatio = "BiliCoinRatio"
BiliReplyRatio = "BiliReplyRatio"
ScoreRelevant = "scoreRelevant"
ScoreRetrieveTag = "scoreRetrieveTag"
//recall tag
)

View File

@@ -0,0 +1,84 @@
package model
//record 字段
const (
ID = "id"
SVID = "svid"
AVID = "avid"
CID = "cid"
Title = "title"
Content = "content"
Duration = "duration"
TID = "tid"
SubTid = "sub_tid"
ScatterTag = "scatter_tag"
Tags = "tags"
TagsName = "tags_name"
TagsID = "tags_id"
TagsType = "tags_type"
ZoneName = "ZoneName"
ZoneID = "ZoneID"
PlayHive = "play_hive"
FavHive = "fav_hive"
LikesHive = "likes_hive"
ShareHive = "share_hive"
ReplyHive = "reply_hive"
DanmuHive = "danmu_hive"
CoinHive = "coin_hive"
PlayWeekBili = "play_week_bili"
FavWeekBili = "fav_week_bili"
LikesWeekBili = "likes_week_bili"
ShareWeekBili = "share_week_bili"
ReplyWeekBili = "reply_week_bili"
DanmuWeekBili = "danmu_week_bili"
CoinWeekBili = "coin_week_bili"
PlayDayBili = "play_day_bili"
FavDayBili = "fav_day_bili"
LikesDayBili = "likes_day_bili"
ShareDayBili = "share_day_bili"
ReplyDayBili = "reply_day_bili"
DanmuDayBili = "danmu_day_bili"
CoinDayBili = "coin_day_bili"
PlayMonthTotal = "play_month_total"
PlayMonthFinish = "play_month_finish"
PlayMonth = "play_month"
FavMonth = "fav_month"
LikesMonth = "likes_month"
ShareMonth = "share_month"
ReplyMonth = "reply_month"
DanmuMonth = "danmu_month"
PlayWeekTotal = "play_week_total"
PlayWeekFinish = "play_week_finish"
PlayWeek = "play_week"
LikesWeek = "likes_week"
ShareWeek = "share_week"
ReplyWeek = "reply_week"
DanmuWeek = "danmu_week"
PlayDayTotal = "play_day_total"
PlayDayFinish = "play_day_finish"
PlayDay = "play_day"
LikesDay = "likes_day"
ShareDay = "share_day"
ReplyDay = "reply_day"
DanmuDay = "danmu_day"
UperMid = "mid"
PubTime = "PubTime"
State = "state"
Retriever = "retriever"
RecallClasses = "RecallClasses"
RecallTags = "RecallTags"
RecallOrder = "RecallOrder"
RecallScore = "RecallScore"
SourceTimeToNow = "SourceTimeToNow"
)

View File

@@ -0,0 +1,36 @@
package model
const (
//State0 视频未审核
State0 = 0
//State1 视频安全审核通过
State1 = 1
//State2 待冷启动回查
State2 = 2
//State3 回查可放出
State3 = 3
//State4 视频优质
State4 = 4
//State5 视频精选
State5 = 5
)
//Record4Dup ...
type Record4Dup struct {
SVID int64 `json:"svid"`
MID string `json:"mid"`
Tag string `json:"tag"`
}
//Tag ...
type Tag struct {
TagName string
TagType int64
TagID int64
}

View File

@@ -0,0 +1,65 @@
package model
import (
"github.com/Dai0522/go-hash/bloomfilter"
)
//Tuple ...
type Tuple struct {
Timestamp int64
Count int64
}
//UserProfile 用户画像数据 包括历史画像和实时日志
type UserProfile struct {
Mid int64 `json:"Mid,omitempty"`
Buvid string `json:"Buvid,omitempty"`
Name string `json:"Name,omitempty"`
Gender int8 `json:"Gender,omitempty"`
ViewVideos []int64 `json:"ViewVideos,omitempty"`
//bbq user profile
//key:up mid, value: timestamp
BBQFollowAction map[int64]int64 `json:"BBQFollowAction,omitempty"`
//key:up mid, value: 1
BBQFollow map[int64]int64 `json:"BBQFollow,omitempty"`
BBQBlack map[int64]int64 `json:"BBQBlack,omitempty"`
BBQTags map[string]float64 `json:"BBQTags,omitempty"`
BBQZones map[string]float64 `json:"BBQZones,omitempty"`
BBQPrefUps map[int64]int64 `json:"BBQPrefUps,omitempty"`
//bili user profile
BiliTags map[string]float64 `json:"BiliTags,omitempty"`
Zones1 map[string]float64 `json:"Zones1,omitempty"`
Zones2 map[string]float64 `json:"Zones2,omitempty"`
FollowUps map[int64]int64 `json:"FollowUps,omitempty"`
//bbq实时数据
//key: SVID, value: timestamp
PosVideos map[int64]int64 `json:"PosVideos,omitempty"`
NegVideos map[int64]int64 `json:"NegVideos,omitempty"`
LikeVideos map[int64]int64 `json:"LikeVideos,omitempty"`
//key: tagID, value: count
LikeTagIDs map[int64]int64 `json:"LikeTagIDs,omitempty"`
PosTagIDs map[int64]int64 `json:"PosTagIDs,omitempty"`
NegTagIDs map[int64]int64 `json:"NegTagIDs,omitempty"`
//key: UP MID, value: timestamp
LikeUPs map[int64]int64 `json:"LikeUPs,omitempty"`
//for old retrieve function
LikeTags map[string]float64 `json:"LikeTags,omitempty"`
PosTags map[string]float64 `json:"PosTags,omitempty"`
NegTags map[string]float64 `json:"NegTags,omitempty"`
//DedupVideos 根据ID去重
DedupVideos []int64 `json:"DedupVideos,omitempty"`
LastRecords []Record4Dup `json:"LastRecords,omitempty"`
LastUpsRecords []Record4Dup `json:"LastRecords,omitempty"`
//BloomFilter 去重用到 SVID
BloomFilter *bloomfilter.BloomFilter `json:"BloomFilter,omitempty"`
}

View File

@@ -0,0 +1,34 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["server.go"],
importpath = "go-common/app/service/bbq/recsys/server/grpc",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/service:go_default_library",
"//library/net/rpc/warden:go_default_library",
"@org_golang_google_grpc//:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,33 @@
package grpc
import (
"context"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/service"
"go-common/library/net/rpc/warden"
"google.golang.org/grpc"
)
// New 生成grpc服务
func New(srv *service.Service) *warden.Server {
// conf := &warden.ServerConfig{Addr: "0.0.0.0:9009"}
// s := warden.NewServer(conf)
s := warden.NewServer(nil)
rpc.RegisterRecsysServer(s.Server(), srv)
s.Use(middleware())
_, err := s.Start()
if err != nil {
panic("run server failed!" + err.Error())
}
return s
}
// middleware middleware
func middleware() grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
//call chain
resp, err = handler(ctx, req)
return
}
}

View File

@@ -0,0 +1,39 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"http.go",
"recsys.go",
],
importpath = "go-common/app/service/bbq/recsys/server/http",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/conf:go_default_library",
"//app/service/bbq/recsys/service:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,56 @@
package http
import (
"net/http"
"go-common/app/service/bbq/recsys/conf"
"go-common/app/service/bbq/recsys/service"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
)
var (
srv *service.Service
vfy *verify.Verify
)
// Init init
func Init(c *conf.Config, s *service.Service) {
srv = s
vfy = verify.New(c.Verify)
engine := bm.DefaultServer(c.BM)
route(engine)
if err := engine.Start(); err != nil {
log.Error("bm Start error(%v)", err)
panic(err)
}
}
func route(e *bm.Engine) {
e.Ping(ping)
e.Register(register)
g := e.Group("/bbq/admin/recsys")
{
g.GET("/verify/start", vfy.Verify, start)
g.GET("/start", start)
}
ga := e.Group("/bbq/admin/recsys")
{
ga.POST("/check/rec/message", reqRecsys)
ga.POST("/check/related/message", relatedRecsys)
ga.POST("/check/ups/message", upsRecsys)
}
}
func ping(c *bm.Context) {
if err := srv.Ping(c); err != nil {
log.Error("ping error(%v)", err)
c.AbortWithStatus(http.StatusServiceUnavailable)
}
}
func register(c *bm.Context) {
c.JSON(map[string]interface{}{}, nil)
}

View File

@@ -0,0 +1,51 @@
package http
import (
"encoding/json"
"go-common/app/service/bbq/recsys/api/grpc/v1"
bm "go-common/library/net/http/blademaster"
"io/ioutil"
)
// start this just a example
func start(c *bm.Context) {
arg := new(v1.RecsysRequest)
if err := c.Bind(arg); err != nil {
return
}
c.JSON(srv.Start(c, arg))
}
func reqRecsys(c *bm.Context) {
res, _ := ioutil.ReadAll(c.Request.Body)
arg := new(v1.RecsysRequest)
json.Unmarshal(res, &arg)
if err := c.Bind(arg); err != nil {
return
}
c.JSON(srv.Start(c, arg))
}
func relatedRecsys(c *bm.Context) {
res, _ := ioutil.ReadAll(c.Request.Body)
arg := new(v1.RecsysRequest)
json.Unmarshal(res, &arg)
if err := c.Bind(arg); err != nil {
return
}
c.JSON(srv.RelatedRecService(c, arg))
}
func upsRecsys(c *bm.Context) {
res, _ := ioutil.ReadAll(c.Request.Body)
arg := new(v1.RecsysRequest)
json.Unmarshal(res, &arg)
if err := c.Bind(arg); err != nil {
return
}
c.JSON(srv.UpsRecService(c, arg))
}

View File

@@ -0,0 +1,60 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"abtest.go",
"filter.go",
"preprocess.go",
"rank.go",
"recsys.go",
"relatedrec.go",
"service.go",
"store.go",
"upsrec.go",
],
importpath = "go-common/app/service/bbq/recsys/service",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/conf:go_default_library",
"//app/service/bbq/recsys/dao:go_default_library",
"//app/service/bbq/recsys/model:go_default_library",
"//app/service/bbq/recsys/service/postprocess:go_default_library",
"//app/service/bbq/recsys/service/rank:go_default_library",
"//app/service/bbq/recsys/service/retrieve:go_default_library",
"//app/service/bbq/recsys/service/util:go_default_library",
"//library/log:go_default_library",
"//library/log/infoc:go_default_library",
"//library/net/trace:go_default_library",
"//library/stat/prom:go_default_library",
"//vendor/github.com/go-ego/murmur:go_default_library",
"//vendor/github.com/json-iterator/go:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys/service/postprocess:all-srcs",
"//app/service/bbq/recsys/service/rank:all-srcs",
"//app/service/bbq/recsys/service/retrieve:all-srcs",
"//app/service/bbq/recsys/service/util:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,45 @@
package service
import (
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"github.com/go-ego/murmur"
)
const (
//ABTestA ...
ABTestA = "bbq-rec-A"
//ABTestB ...
ABTestB = "bbq-rec-B"
)
//DoABTest ...
func (s *Service) DoABTest(request *rpc.RecsysRequest) {
bucket := -1
if request.MID > 0 {
bucket = int(request.MID % 100)
} else if len(request.BUVID) > 0 {
hash := murmur.Sum32(request.BUVID)
level0 := hash % 100
level1 := hash / 100 % 100
level2 := hash / 10000 % 100
bucket = int(level0)
request.Abtest = fmt.Sprintf("Rank:%d;Recall:%d;Rule:%d", level0, level1, level2)
}
if bucket != -1 {
if bucket < 50 {
request.Abtest = ABTestA
} else {
request.Abtest = ABTestB
}
}
//white list
if request.MID == 5829468 {
request.Abtest = ABTestA
}
if request.MID == 208259 {
request.Abtest = ABTestB
}
}

View File

@@ -0,0 +1,222 @@
package service
import (
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
"go-common/library/log"
"strconv"
)
//FilterManager ...
type FilterManager struct {
filterNodes []FilterNode
relatedFilterNodes []FilterNode
upsFilterNodes []FilterNode
}
//FilterNode ...
type FilterNode interface {
doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile)
}
// NewFilterManager new a filter manager
func NewFilterManager() (m *FilterManager) {
m = &FilterManager{
filterNodes: make([]FilterNode, 0),
relatedFilterNodes: make([]FilterNode, 0),
upsFilterNodes: make([]FilterNode, 0),
}
defaultFilterNode := &DefaultFilterNode{}
bloomFilterNode := &BloomFilterNode{}
durationFilterNode := &DurationFilterNode{}
followsFilterNode := &FollowsFilterNode{}
blackFilterNode := &BlackFilterNode{}
m.filterNodes = append(m.filterNodes, defaultFilterNode, bloomFilterNode, blackFilterNode, durationFilterNode)
relatedFilterNode := &RelatedFilterNode{}
m.relatedFilterNodes = append(m.relatedFilterNodes, defaultFilterNode, relatedFilterNode)
m.upsFilterNodes = append(m.upsFilterNodes, defaultFilterNode, blackFilterNode, followsFilterNode)
return
}
func (m *FilterManager) filter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
for _, filterNode := range m.filterNodes {
filterNode.doFilter(req, response, profile)
}
}
func (m *FilterManager) relatedFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
for _, filterNode := range m.relatedFilterNodes {
filterNode.doFilter(req, response, profile)
}
}
func (m *FilterManager) upsFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
for _, filterNode := range m.upsFilterNodes {
filterNode.doFilter(req, response, profile)
}
}
//DefaultFilterNode ...
type DefaultFilterNode struct {
FilterNode
}
func (f *DefaultFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
if req.DebugFlag {
log.Info("Default Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
records := make([]*rpc.RecsysRecord, 0)
viewedVideoSet := make(map[int64]int64)
for _, record := range response.List {
if _, ok := viewedVideoSet[record.Svid]; !ok {
records = append(records, record)
}
viewedVideoSet[record.Svid] = 1
}
response.List = records
if req.DebugFlag {
log.Info("Default Filter Node after records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
}
//BloomFilterNode ...
type BloomFilterNode struct {
FilterNode
}
func (f *BloomFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
if req.DebugFlag {
log.Info("Bloom Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
if response.Message[model.ResponseDownGrade] == "1" {
if req.DebugFlag {
log.Info("Do not do Bloom Filter in down grade state, traceID is (%v)", req.TraceID)
}
return
}
if profile.BloomFilter == nil {
return
}
records := make([]*rpc.RecsysRecord, 0)
for _, record := range response.List {
svid := uint64(record.Svid)
if !profile.BloomFilter.MightContainUint64(svid) {
records = append(records, record)
}
}
response.List = records
if req.DebugFlag {
log.Info("Bloom Filter Node after records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
}
// RelatedFilterNode ...
type RelatedFilterNode struct {
FilterNode
}
func (f *RelatedFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
records := make([]*rpc.RecsysRecord, 0)
upMID := response.Message[retrieve.SourceUpMID]
for _, record := range response.List {
if record.Svid != req.SVID && record.Map[model.UperMid] != upMID {
records = append(records, record)
}
}
response.List = records
}
// FollowsFilterNode
type FollowsFilterNode struct {
FilterNode
}
//去掉关注过的up主
func (f *FollowsFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
if req.DebugFlag {
log.Info("Follow Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
records := make([]*rpc.RecsysRecord, 0)
for _, record := range response.List {
upMID, _ := strconv.ParseInt(record.Map[model.UperMid], 10, 64)
if _, ok := profile.BBQFollow[upMID]; ok {
continue
}
records = append(records, record)
}
response.List = records
if req.DebugFlag {
log.Info("Follow Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
}
//BlackFilterNode ...
type BlackFilterNode struct {
FilterNode
}
func (f *BlackFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
if req.DebugFlag {
log.Info("Black Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
records := make([]*rpc.RecsysRecord, 0)
for _, record := range response.List {
upMID, _ := strconv.ParseInt(record.Map[model.UperMid], 10, 64)
if _, ok := profile.BBQBlack[upMID]; ok {
continue
}
records = append(records, record)
}
response.List = records
if req.DebugFlag {
log.Info("Black Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
}
//DurationFilterNode ...
type DurationFilterNode struct {
FilterNode
}
func (f *DurationFilterNode) doFilter(req *rpc.RecsysRequest, response *rpc.RecsysResponse, profile *model.UserProfile) {
if req.DebugFlag {
log.Info("Duration Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
records := make([]*rpc.RecsysRecord, 0)
for _, record := range response.List {
duration, _ := strconv.ParseInt(record.Map[model.Duration], 10, 64)
if duration > 60 || duration < 15 {
continue
}
records = append(records, record)
}
response.List = records
if req.DebugFlag {
log.Info("Duration Filter Node before records size: (%v), traceID is (%v)", len(response.List), req.TraceID)
}
}

View File

@@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"downGradePostProcessor.go",
"postprocess.go",
"relevantInsertPostProcessor.go",
"scatterTagUpPostProcessor.go",
"selectInsertPostProcessor.go",
"weakInterventionPostProcessor.go",
],
importpath = "go-common/app/service/bbq/recsys/service/postprocess",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/model:go_default_library",
"//app/service/bbq/recsys/service/retrieve:go_default_library",
"//app/service/bbq/recsys/service/util:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,28 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"math/rand"
)
//DownGradeProcessor ..
type DownGradeProcessor struct {
Processor
}
func (p *DownGradeProcessor) name() (name string) {
name = "DownGradeProcessor"
return
}
func (p *DownGradeProcessor) process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
if _, ok := response.Message[model.ResponseDownGrade]; ok {
rand.Shuffle(len(response.List), func(i, j int) {
response.List[i], response.List[j] = response.List[j], response.List[i]
})
}
return
}

View File

@@ -0,0 +1,130 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
)
//TagTypeZone2 二级分区
const (
TagTypeZone2 = "2"
// 推荐页打散一刷逻辑+相邻队列打散逻辑,
// 因为相邻队列打散逻辑中只考虑连续情况,所以只用考虑之前队列的最后一个元素与当前队列中最前一个元素的关系
// 所以可以不用做单独的相邻队列打散逻辑
_RecTagTotalLimit = 2
_RecTagAdjacencyLimit = 2
_RecUpTotalLimit = 1
_RecUpAdjacencyLimit = 1
_RecLastScreenCnt = 5
// 关注推荐打散一刷逻辑+相邻队列打散逻辑,
// 因为相邻队列打散逻辑中只考虑连续情况,所以只用考虑之前队列的最后一个元素与当前队列中最前一个元素的关系
// 所以可以不用做单独的相邻队列打散逻辑
_UpsRecTagTotalLimit = 5
_UpsRecTagAdjacencyLimit = 2
_UpsRecUpTotalLimit = 1
_UpsRecUpAdjacencyLimit = 1
_UpsRecLastScreenCnt = 10
////相邻队列打散,目前推荐页与关注推荐的逻辑是一致的
//_AdjacentQueueTagTotalLimit = 0
//_AdjacentQueueTagAdjacencyLimit = 2
//_AdjacentQueueUpTotalLimit = 0
//_AdjacentQueueUpAdjacencyLimit = 1
)
//PostProcessor ...
type PostProcessor struct {
name string
processors []Processor
ProcessRec process
ProcessUpsRec process
}
type process func(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error)
//Processor ...
type Processor interface {
name() (name string)
process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error)
}
//NewPostProcessor ...
func NewPostProcessor() (p *PostProcessor) {
processRec := p.buildProcessRec()
processUpsRec := p.buildProcessUpsRec()
p = &PostProcessor{
name: "post",
processors: make([]Processor, 0),
ProcessRec: processRec,
ProcessUpsRec: processUpsRec,
}
return
}
func (p *PostProcessor) buildProcessRec() process {
processors := make([]Processor, 0)
weakInterventionProcessor := &WeakInterventionProcessor{}
processors = append(processors, weakInterventionProcessor)
downGradeProcessor := &DownGradeProcessor{}
processors = append(processors, downGradeProcessor)
relevantInsertProcessor := &RelevantInsertProcessor{}
processors = append(processors, relevantInsertProcessor)
scatterTagUpProcessor := &ScatterTagUpProcessor{lastScreenCount: _RecLastScreenCnt,
tagTotalLimit: _RecTagTotalLimit,
tagAdjacencyLimit: _RecTagAdjacencyLimit,
upTotalLimit: _RecUpTotalLimit,
upAdjacencyLimit: _RecUpAdjacencyLimit,
lastRecordsType: "lastRecords"}
processors = append(processors, scatterTagUpProcessor)
process := func(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
for _, processor := range processors {
err = processor.process(ctx, request, response, u)
if err != nil {
break
}
}
return
}
return process
}
func (p *PostProcessor) buildProcessUpsRec() process {
processors := make([]Processor, 0)
weakInterventionProcessor := &WeakInterventionProcessor{}
processors = append(processors, weakInterventionProcessor)
downGradeProcessor := &DownGradeProcessor{}
processors = append(processors, downGradeProcessor)
scatterTagUpProcessor := &ScatterTagUpProcessor{lastScreenCount: _UpsRecLastScreenCnt,
tagTotalLimit: _UpsRecTagTotalLimit,
tagAdjacencyLimit: _UpsRecTagAdjacencyLimit,
upTotalLimit: _UpsRecUpTotalLimit,
upAdjacencyLimit: _UpsRecUpAdjacencyLimit,
lastRecordsType: "lastUpsRecords"}
processors = append(processors, scatterTagUpProcessor)
process := func(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
for _, processor := range processors {
err = processor.process(ctx, request, response, u)
if err != nil {
break
}
}
return
}
return process
}

View File

@@ -0,0 +1,59 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
"strconv"
"strings"
)
//RelevantInsertProcessor ..
type RelevantInsertProcessor struct {
Processor
}
func (p *RelevantInsertProcessor) name() (name string) {
name = "RelevantInsert"
return
}
func (p *RelevantInsertProcessor) process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
insertPosition := 0
if insertPosition > len(response.List) {
return
}
targetIndex := -1
for index, record := range response.List {
if targetIndex != -1 {
break
}
for _, recallClass := range strings.Split(record.Map[model.RecallClasses], "|") {
if recallClass == retrieve.LikeI2IRecall || recallClass == retrieve.LikeTagRecall || recallClass == retrieve.LikeUPRecall || recallClass == retrieve.FollowRecall {
if actionTimeToNow, ok := record.Map[model.SourceTimeToNow]; ok {
timeInSceonds, _ := strconv.ParseInt(actionTimeToNow, 10, 64)
if timeInSceonds <= 2*3600 {
if index <= insertPosition {
break
} else {
targetIndex = index
break
}
}
}
}
}
}
if targetIndex != -1 {
record := response.List[targetIndex]
record.Map[model.OrderPostProcess] = p.name()
response.List = append(response.List[:targetIndex], response.List[targetIndex+1:]...)
tmpList := append(response.List[:insertPosition], record)
response.List = append(tmpList, response.List[insertPosition+1:]...)
}
return
}

View File

@@ -0,0 +1,191 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"strings"
)
//ScatterTagUpProcessor ...
type ScatterTagUpProcessor struct {
Processor
lastScreenCount int
tagTotalLimit int
tagAdjacencyLimit int
upTotalLimit int
upAdjacencyLimit int
lastRecordsType string
//adjacentQueueTagTotalLimit int
//adjacentQueueTagAdjacencyLimit int
//adjacentQueueTagUpTotalLimit int
//adjacentQueueTagUpAdjacencyLimit int
}
func (p *ScatterTagUpProcessor) name() (name string) {
name = "ScatterTagUp"
return
}
func (p *ScatterTagUpProcessor) process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
if response.List == nil || len(response.List) == 0 {
return
}
for _, record := range response.List {
tagNameStr := record.Map[model.TagsName]
tagTypeStr := record.Map[model.TagsType]
tagNames := strings.Split(tagNameStr, "|")
tagTypes := strings.Split(tagTypeStr, "|")
for i, tagType := range tagTypes {
if tagType == TagTypeZone2 {
tagName := tagNames[i]
//FIXME BBQ HACK
if tagName == "宅舞" || tagName == "MMD·3D" || tagName == "三次元舞蹈" || tagName == "舞蹈教程" {
tagName = "舞蹈"
}
record.Map[model.ScatterTag] = tagName
}
}
}
//{key, value[adjacentCnt, totalCnt]}
//{key, totalCnt}
tagCntMap := make(map[string]int)
upCntMap := make(map[string]int)
adjacentTag := make(map[string]int)
adjacentUp := make(map[string]int)
var lastRecords []model.Record4Dup
if p.lastRecordsType == "lastRecords" {
lastRecords = u.LastRecords
//对于主推荐流程,保持4刷中没有重复up主,此处取前三刷,加上本轮的一刷.
lastRecordsLength := len(lastRecords)
for k := max(0, lastRecordsLength-3*p.lastScreenCount); k < lastRecordsLength; k++ {
upMid := lastRecords[int64(k)].MID
if _, ok := upCntMap[upMid]; ok {
upCntMap[upMid]++
} else {
upCntMap[upMid] = 1
}
}
} else {
lastRecords = u.LastUpsRecords
}
if lastRecords != nil && len(lastRecords) > 0 {
//记录上页最后一个结果
lastRecord := lastRecords[len(lastRecords)-1]
processAdjacentMap(adjacentTag, lastRecord.Tag)
processAdjacentMap(adjacentUp, lastRecord.MID)
}
pageSize := int(request.Limit)
originRecords := make([]*v1.RecsysRecord, len(response.List))
copy(originRecords, response.List)
excludedRecords := make([]*v1.RecsysRecord, 0)
i := 0
j := 0
fromExcluded := false
isSatisfiedOne := false
record := originRecords[0]
for i < len(originRecords) {
if isSatisfiedOne && len(excludedRecords) > 0 {
record = excludedRecords[0]
fromExcluded = true
} else {
record = originRecords[i]
i++
fromExcluded = false
}
isSatisfiedOne = false
if tagKey, ok := record.Map[model.ScatterTag]; ok {
if upKey, ok := record.Map[model.UperMid]; ok {
//是否同时满足
if isSatisfied(tagKey, p.tagTotalLimit, p.tagAdjacencyLimit, tagCntMap, adjacentTag) {
if isSatisfied(upKey, p.upTotalLimit, p.upAdjacencyLimit, upCntMap, adjacentUp) {
response.List[j] = record
j++
isSatisfiedOne = true
//只有添加时,才处理相邻关系的map
processAdjacentMap(adjacentTag, tagKey)
processAdjacentMap(adjacentUp, upKey)
tagCntMap[tagKey]++
upCntMap[upKey]++
if fromExcluded {
excludedRecords = excludedRecords[1:]
}
}
}
}
}
if !isSatisfiedOne {
record.Map[model.OrderPostProcess] = p.name()
if !fromExcluded {
excludedRecords = append(excludedRecords, record)
}
}
//结果个数满足,则跳出循环
if j >= pageSize {
break
}
}
for k := 0; k < len(excludedRecords); k++ {
response.List[j] = excludedRecords[k]
j++
}
for ; i < len(originRecords); i++ {
response.List[j] = originRecords[i]
j++
}
return
}
func isSatisfied(findKey string, totalLimit int, adjacentLimit int, totalCntMap map[string]int, adjacentCntMap map[string]int) (isOk bool) {
isOk = false
if findKey == "" {
return isOk
}
if total, ok := totalCntMap[findKey]; ok {
if totalLimit <= 0 || total < totalLimit { // limit <=0 意味着没有限制
if adjacentCnt, ok := adjacentCntMap[findKey]; ok {
if adjacentLimit <= 0 || adjacentCnt < adjacentLimit { // limit <=0 意味着没有限制
isOk = true
return isOk
}
} else {
isOk = true
return isOk
}
}
} else if adjacentCnt, ok := adjacentCntMap[findKey]; ok {
if adjacentLimit <= 0 || adjacentCnt < adjacentLimit { // limit <=0 意味着没有限制
isOk = true
return isOk
}
} else {
isOk = true
return isOk
}
return isOk
}
func processAdjacentMap(srcMap map[string]int, key string) {
if srcMap == nil {
return
}
if _, ok := srcMap[key]; ok {
srcMap[key]++
} else {
for k := range srcMap {
delete(srcMap, k)
}
srcMap[key] = 1
}
}
func max(x, y int) int {
if x < y {
return y
}
return x
}

View File

@@ -0,0 +1,49 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
)
//SelectInsertProcessor ..
type SelectInsertProcessor struct {
Processor
}
func (p *SelectInsertProcessor) name() (name string) {
name = "SelectInsert"
return
}
func (p *SelectInsertProcessor) process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
//response.List[100].Map[model.Retriever] = retrieve.Test
insertPosition := 0
if insertPosition > len(response.List) {
return
}
targetIndex := -1
for index, record := range response.List {
if retriever, ok := record.Map[model.Retriever]; ok {
if index > insertPosition && retriever == retrieve.SelectionRecall {
targetIndex = index
break
}
}
}
if targetIndex != -1 {
record := response.List[targetIndex]
//标记
record.Map[model.OrderPostProcess] = p.name()
response.List = append(response.List[:targetIndex], response.List[targetIndex+1:]...)
tmpList := append(response.List[:insertPosition], record)
response.List = append(tmpList, response.List[insertPosition+1:]...)
}
return
}

View File

@@ -0,0 +1,47 @@
package postprocess
import (
"context"
"go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/util"
"sort"
"strconv"
)
//WeakInterventionProcessor ...
type WeakInterventionProcessor struct {
Processor
}
func (p *WeakInterventionProcessor) name() (name string) {
name = "WeakIntervention"
return
}
func (p *WeakInterventionProcessor) process(ctx context.Context, request *v1.RecsysRequest, response *v1.RecsysResponse, u *model.UserProfile) (err error) {
if response == nil || len(response.List) == 0 {
return
}
for _, record := range response.List {
if state, ok := record.Map[model.State]; ok {
stateID, _ := strconv.ParseInt(state, 10, 64)
switch stateID {
case model.State4:
record.Score = 1.02 * record.Score
continue
case model.State5:
record.Score = 1.05 * record.Score
continue
default:
continue
}
}
}
//sort
sort.Sort(sort.Reverse(util.Records(response.List)))
for index, record := range response.List {
record.Map[model.OrderWeakIntervention] = strconv.Itoa(index)
}
return
}

View File

@@ -0,0 +1 @@
package service

View File

@@ -0,0 +1,181 @@
package service
import (
"bytes"
"context"
"fmt"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/dao"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/rank"
"go-common/app/service/bbq/recsys/service/util"
"math"
"sort"
"strconv"
"github.com/json-iterator/go"
)
//RankManager ...
type RankManager struct {
Rankers []Ranker
featureLogs []*rank.FeatureLog
}
//Ranker ...
type Ranker interface {
name() (name string)
rank(c context.Context, req *recsys.RecsysRequest, response *recsys.RecsysResponse, profile *model.UserProfile, dao *dao.Dao) (featureLogs []*rank.FeatureLog)
}
//NewRankManager ...
func NewRankManager() (m *RankManager) {
m = &RankManager{
Rankers: make([]Ranker, 0),
featureLogs: make([]*rank.FeatureLog, 0),
}
base := &BaseRanker{
weights: initWeights(),
}
m.Rankers = append(m.Rankers, base)
return
}
func initWeights() (weights map[string]float64) {
weights = make(map[string]float64)
//session feature
weights[rank.SessionLikeTag] = 0.05
weights[rank.LikeTagCount] = 0.05
weights[rank.SessionPosPlayTag] = 0.01
weights[rank.SessionNegPlayTag] = -0.04
weights[rank.PureNegPlayTag] = -0.03
weights[rank.LikeI2ITimeDiff] = 0.1
weights[rank.FollowTimeDiff] = 0.05 //follow
weights[rank.SessionBBQFollow] = 0.01 //follow
//recall feature
weights[rank.FollowRecall] = 0.01 //follow
weights[rank.LikeI2IRecall] = 0.01
weights[rank.LikeTagRecall] = 0.01
weights[rank.PosI2IRecall] = 0.01
weights[rank.PosTagRecall] = 0.0
weights[rank.UserProfileBili] = 0.0
weights[rank.UserProfileBBQ] = 0.0
weights[rank.SelectionRecall] = 0.0
weights[rank.BiliFollowsRecall] = 0.5
weights[rank.HotRecall] = 0.0
weights[rank.RandomRecall] = -0.01
//user-item feature
weights[rank.MatchBBQTagCountScore] = 0.02
weights[rank.MatchBBQTagCountScore] = 0.02
weights[rank.MatchBBQTagCount] = 0.0
weights[rank.MatchBiliTagCount] = 0.0
weights[rank.MatchBiliTagLevel3] = 0.015
weights[rank.MatchBiliTagLevel2] = 0.01
weights[rank.BiliPrefUp] = 0.005
weights[rank.MatchBBQTagLevel3] = 0.015
weights[rank.MatchBBQTagLevel2] = 0.01
weights[rank.BBQPrefUp] = 0.005
weights[rank.BBQFollow] = 0.005 //follow
// item feature:
weights[rank.OperationLevel] = 0.0
// item feature: bili
weights[rank.BiliPlayNum] = 0.05
weights[rank.BiliFavRatio] = 0.2
weights[rank.BiliLikeRatio] = 0.2
weights[rank.BiliShareRatio] = 0.2
weights[rank.BiliCoinRatio] = 0.2
weights[rank.BiliReplyRatio] = 0.2
// item feature: bbq
weights[rank.BBQPlayNum] = 0.05
weights[rank.BBQFavRatio] = 0.0
weights[rank.BBQLikeRatio] = 0.2
weights[rank.BBQShareRatio] = 0.2
weights[rank.BBQCoinRatio] = 0.0
weights[rank.BBQReplyRatio] = 0.2
return
}
func (m *RankManager) rank(c context.Context, req *recsys.RecsysRequest, response *recsys.RecsysResponse, profile *model.UserProfile, dao *dao.Dao) {
ranker := m.Rankers[0]
m.featureLogs = ranker.rank(c, req, response, profile, dao)
}
//BaseRanker ...
type BaseRanker struct {
Ranker
weights map[string]float64
}
func (r *BaseRanker) name() (name string) {
name = "base"
return
}
func (r *BaseRanker) rank(c context.Context, req *recsys.RecsysRequest, response *recsys.RecsysResponse, userProfile *model.UserProfile, dao *dao.Dao) (featureLogs []*rank.FeatureLog) {
response.Message[model.RankModelName] = "rule001"
//dao.InitModel(c, r.weights)
featureLogs = make([]*rank.FeatureLog, 0)
for _, record := range response.List {
featureLog, featureValueMap := rank.BuildFeature(record, userProfile)
score := 0.0
scoreMap := make(map[string]string)
for feature, weight := range r.weights {
featureValue := featureValueMap[feature]
score = score + featureValue*weight
if req.DebugFlag {
if math.Abs(featureValue*weight) > 0.0001 {
scoreMap[feature] = fmt.Sprintf("%.6f=%.6f*%.6f", featureValue*weight, featureValue, weight)
}
}
}
record.Score = score
featureLog.Score = score
//debug log
if req.DebugFlag {
var buffer bytes.Buffer
buffer.WriteString(model.ScoreTotalScore)
buffer.WriteString(":")
buffer.WriteString(strconv.FormatFloat(score, 'f', 6, 64))
buffer.WriteString("=")
scoreDetailList := util.SortStrMapByValue(scoreMap)
for _, pair := range scoreDetailList {
buffer.WriteString(pair.Key)
buffer.WriteString(":")
buffer.WriteString(pair.Value)
buffer.WriteString(",")
}
record.Map[model.ScoreMessage] = buffer.String()
featureLogStr, _ := jsoniter.MarshalToString(featureLog)
record.Map[model.FeatureString] = featureLogStr
}
}
sort.Sort(sort.Reverse(util.Records(response.List)))
for index, record := range response.List {
record.Map[model.OrderRanker] = strconv.Itoa(index)
record.Map[model.RankModelScore] = strconv.FormatFloat(record.Score, 'f', -1, 64)
}
return
}

View File

@@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"feature.go",
"instance.go",
"model.go",
"rank.go",
],
importpath = "go-common/app/service/bbq/recsys/service/rank",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/model:go_default_library",
"//app/service/bbq/recsys/service/rank/treelite:go_default_library",
"//app/service/bbq/recsys/service/retrieve:go_default_library",
"//app/service/bbq/recsys/service/util:go_default_library",
"//library/log:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/bbq/recsys/service/rank/treelite:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,466 @@
package rank
import (
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
"go-common/app/service/bbq/recsys/service/util"
"go-common/library/log"
"math"
"strconv"
"strings"
"time"
)
//rank feature names
const (
MatchBBQTagLevel3 = "MatchBBQTagLevel3"
MatchBBQTagLevel2 = "MatchBBQTagLevel2"
MatchBBQTagCount = "MatchBBQTagCount"
MatchBBQTagCountScore = "MatchBBQTagCountScore"
BBQPrefUp = "BBQPrefUp"
BBQFollow = "BBQFollow"
BBQBlack = "BBQBlack"
MatchBiliTagLevel3 = "MatchBiliTagLevel3"
MatchBiliTagLevel2 = "MatchBiliTagLevel2"
MatchBiliTagCount = "MatchBiliTagCount"
MatchBiliTagCountScore = "MatchBiliTagCountScore"
BiliPrefUp = "BiliPrefUp"
SessionBBQFollow = "SessionBBQFollow"
SessionLikeTag = "SessionLikeTag"
SessionPosPlayTag = "SessionPosPlayTag"
SessionNegPlayTag = "SessionNegPlayTag"
PureNegPlayTag = "PureNegPlayTag"
OperationLevel = "OperationLevel"
BiliPlayNum = "BiliPlayNum"
BiliFavRatio = "BiliFavRatio"
BiliLikeRatio = "BiliLikeRatio"
BiliShareRatio = "BiliShareRatio"
BiliCoinRatio = "BiliCoinRatio"
BiliReplyRatio = "BiliReplyRatio"
BBQPlayTotal = "BBQPlayTotal"
BBQPlayNum = "BBQPlayNum"
BBQFavRatio = "BBQFavRatio"
BBQLikeRatio = "BBQLikeRatio"
BBQShareRatio = "BBQShareRatio"
BBQCoinRatio = "BBQCoinRatio"
BBQReplyRatio = "BBQReplyRatio"
//recall
HotRecall = "HotRecall"
SelectionRecall = "SelectionRecall"
BiliFollowsRecall = "BiliFollowsRecall"
UserProfileBili = "UserProfileBili"
UserProfileBBQ = "UserProfileBBQ"
LikeI2IRecall = "LikeI2IRecall"
PosI2IRecall = "PosI2IRecall"
LikeTagRecall = "LikeTagRecall"
PosTagRecall = "PosTagRecall"
FollowRecall = "FollowRecall"
RandomRecall = "RandomRecall"
LikeUpTimeDiff = "LikeUpTimeDiff"
FollowTimeDiff = "FollowTimeDiff"
LikeI2ITimeDiff = "LikeI2ITimeDiff"
LikeTagCount = "LikeTagCount"
)
//FeatureLog is feature for log
type FeatureLog struct {
// Record
record *recsys.RecsysRecord
// score
Score float64 `json:"score,omitempty"`
// user feature
MID int64 `json:"mid,omitempty"`
BUVID string `json:"buvid,omitempty"`
Gender int8 `json:"gender,omitempty"`
Age int8 `json:"age,omitempty"`
ViewVideoNum int `json:"ViewVideoNum,omitempty"`
// item feature
//item feature: attribute
SVID int64 `json:"svid,omitempty"`
AVID int64 `json:"avid,omitempty"`
CID int64 `json:"cid,omitempty"`
PubTime int64 `json:"pubtime,omitempty"`
PubTimeToNow int64 `json:"pubtimetonow,omitempty"`
TagID1 int64 `json:"tagid1,omitempty"`
TagID2 int64 `json:"tagid2,omitempty"`
ZoneID int64 `json:"ZoneID,omitempty"`
Duration int64 `json:"duration,omitempty"`
Width int64 `json:"width,omitempty"`
Height int64 `json:"height,omitempty"`
Rotate int64 `json:"rotate,omitempty"`
State int64 `json:"State,omitempty"`
//item feature: feedback
// bili
PlayB int64 `json:"playb,omitempty"`
FavB int64 `json:"favb,omitempty"`
LikeB int64 `json:"likeb,omitempty"`
ShareB int64 `json:"shareb,omitempty"`
ReplyB int64 `json:"replyb,omitempty"`
CoinB int64 `json:"coinb,omitempty"`
// bbq
PlayBBQTotal int64 `json:"PlayBBQTotal,omitempty"`
PlayBBQ int64 `json:"PlayBBQ,omitempty"`
PlayBBQFinish int64 `json:"PlayBBQFinish,omitempty"`
LikeBBQ int64 `json:"LikeBBQ,omitempty"`
ShareBBQ int64 `json:"ShareBBQ,omitempty"`
ReplyBBQ int64 `json:"ReplyBBQ,omitempty"`
// retrieve feature
RecallClasses string `json:"RecallClasses,omitempty"`
RetrieveName string `json:"retrievename,omitempty"`
RetrieveNum int64 `json:"retrievenum,omitempty"`
OperationLevel int64 `json:"operationlevel,omitempty"`
// user-item feature
MatchBiliTagLevel1 int64 `json:"matchtaglevel1,omitempty"`
MatchBiliTagLevel2 int64 `json:"matchtaglevel2,omitempty"`
MatchBiliTagLevel3 int64 `json:"matchtaglevel3,omitempty"`
MatchTitle int64 `json:"matchtitle,omitempty"`
MatchBBQTagLevel2 int64 `json:"MatchBBQTagLevel2,omitempty"`
MatchBBQTagLevel3 int64 `json:"MatchBBQTagLevel3,omitempty"`
MatchBiliTagCount int `json:"MatchBiliTagCount,omitempty"`
MatchBBQTagCount int `json:"MatchBBQTagCount,omitempty"`
// user-item-up feature
BBQPrefUp int64 `json:"BBQPrefUp,omitempty"`
BiliPrefUp int64 `json:"BiliPrefUp,omitempty"`
BBQFollow int64 `json:"BBQFollow,omitempty"`
BBQBlack int64 `json:"BBQBlack,omitempty"`
LikeAuthor int64 `json:"likeauthor,omitempty"`
PlayAuthor int64 `json:"playauthor,omitempty"`
// user-item feature: bbq session feature
SessionBBQFollow int64 `json:"SessionBBQFollow,omitempty"`
SessionLikeI2I int64 `json:"SessionLikeI2I,omitempty"`
SessionLikeTag int64 `json:"SessionLikeTag,omitempty"`
SessionLikeTag1 int64 `json:"SessionLikeTag1,omitempty"`
SessionLikeTag2 int64 `json:"SessionLikeTag2,omitempty"`
SessionLikeTag3 int64 `json:"SessionLikeTag3,omitempty"`
SessionPosPlayTag int64 `json:"SessionPosPlayTag,omitempty"`
SessionPosPlayTag1 int64 `json:"SessionPosPlayTag1,omitempty"`
SessionPosPlayTag2 int64 `json:"SessionPosPlayTag2,omitempty"`
SessionPosPlayTag3 int64 `json:"SessionPosPlayTag3,omitempty"`
SessionNegPlayTag int64 `json:"SessionNegPlayTag,omitempty"`
PureNegPlayTag int64 `json:"PureNegPlayTag,omitempty"`
SessionNegPlayTag1 int64 `json:"SessionNegPlayTag1,omitempty"`
SessionNegPlayTag2 int64 `json:"SessionNegPlayTag2,omitempty"`
SessionNegPlayTag3 int64 `json:"SessionNegPlayTag3,omitempty"`
MatchLast1VideoTag1 int64 `json:"matchlast1videotag1,omitempty"`
MatchLast1VideoTag2 int64 `json:"matchlast1videotag2,omitempty"`
MatchLast1VideoTag3 int64 `json:"matchlast1videotag3,omitempty"`
Last1VideoTag3 int64
// context feature
HourOfDay int64 `json:"hourofday,omitempty"`
DayOfWeek int64 `json:"dayofweek,omitempty"`
}
//BuildFeature ...
func BuildFeature(record *recsys.RecsysRecord, userProfile *model.UserProfile) (featureLog *FeatureLog, featureValueMap map[string]float64) {
now := time.Now().Unix()
featureValueMap = make(map[string]float64)
featureLog = &FeatureLog{}
featureLog.record = record
featureLog.MID = userProfile.Mid
featureLog.BUVID = userProfile.Buvid
featureLog.SVID = record.Svid
featureLog.AVID, _ = strconv.ParseInt(record.Map[model.AVID], 10, 64)
featureLog.CID, _ = strconv.ParseInt(record.Map[model.CID], 10, 64)
featureLog.Duration, _ = strconv.ParseInt(record.Map[model.Duration], 10, 64)
featureLog.ZoneID, _ = strconv.ParseInt(record.Map[model.ZoneID], 10, 64)
// recall feature
featureLog.RecallClasses = record.Map[model.RecallClasses]
recallClasses := strings.Split(record.Map[model.RecallClasses], "|")
for _, recallClass := range recallClasses {
featureValueMap[recallClass] = 1
}
recallTags := strings.Split(record.Map[model.RecallTags], "|")
for _, recallTag := range recallTags {
if strings.HasPrefix(recallTag, retrieve.RecallKeyTagIDPrefix) {
fields := strings.Split(recallTag, ":")
if len(fields) >= 3 {
tagStr := strings.Split(recallTag, ":")[2]
sourceTagID, _ := strconv.ParseInt(tagStr, 10, 64)
if count, ok := userProfile.LikeTagIDs[sourceTagID]; ok {
featureValueMap[LikeTagCount] = util.ScoreCount(float64(count))
}
} else {
log.Error("feature error like tag recall")
}
}
if strings.HasPrefix(recallTag, retrieve.RecallKeyI2IPrefix) {
fields := strings.Split(recallTag, ":")
if len(fields) >= 3 {
I2IStr := strings.Split(recallTag, ":")[2]
sourceID, _ := strconv.ParseInt(I2IStr, 10, 64)
if sourceTimestamp, ok := userProfile.LikeVideos[sourceID]; ok {
timeDiff := math.Max(float64(now-sourceTimestamp), 0)
timeDiffScore := math.Max(util.ScoreTimeDiff(timeDiff), 0)
featureValueMap[LikeI2ITimeDiff] = timeDiffScore
record.Map[model.SourceTimeToNow] = strconv.Itoa(int(timeDiff))
}
} else {
log.Error("feature error like i2i recall")
}
}
if strings.HasPrefix(recallTag, retrieve.RecallKeyUpIDPrefix) {
fields := strings.Split(recallTag, ":")
if len(fields) >= 3 {
str := strings.Split(recallTag, ":")[2]
sourceUpID, _ := strconv.ParseInt(str, 10, 64)
if sourceTimestamp, ok := userProfile.BBQFollowAction[sourceUpID]; ok {
timeDiff := math.Max(float64(now-sourceTimestamp), 0)
timeDiffScore := math.Max(util.ScoreTimeDiff(timeDiff), 0)
featureValueMap[FollowTimeDiff] = timeDiffScore
record.Map[model.SourceTimeToNow] = strconv.Itoa(int(timeDiff))
}
if sourceTimestamp, ok := userProfile.LikeUPs[sourceUpID]; ok {
timeDiff := math.Max(float64(now-sourceTimestamp), 0)
timeDiffScore := math.Max(util.ScoreTimeDiff(timeDiff), 0)
featureValueMap[LikeUpTimeDiff] = timeDiffScore
record.Map[model.SourceTimeToNow] = strconv.Itoa(int(timeDiff))
}
} else {
log.Error("feature error follow recall")
}
}
}
// user feature
featureLog.ViewVideoNum = len(userProfile.ViewVideos)
// user tag && item tag
matchBiliTagCount := 0
matchTagCount := 0
itemTagIDs := strings.Split(record.Map[model.TagsID], "|")
tagCount := len(itemTagIDs)
for _, tagIDStr := range itemTagIDs {
//bili user userProfile tag
if tagScore, ok := userProfile.BiliTags[tagIDStr]; ok {
if tagScore > 0 {
featureLog.MatchBiliTagLevel3 = 1
featureValueMap[MatchBiliTagLevel3] = 1
matchBiliTagCount++
}
}
if tagScore, ok := userProfile.Zones2[tagIDStr]; ok {
if tagScore > 0 {
featureLog.MatchBiliTagLevel2 = 1
featureValueMap[MatchBiliTagLevel2] = 1
matchBiliTagCount++
}
}
//bbq user userProfile tag
if tagScore, ok := userProfile.BBQZones[tagIDStr]; ok {
if tagScore > 0 {
featureLog.MatchBBQTagLevel2 = 1
featureValueMap[MatchBBQTagLevel2] = 1
matchTagCount++
}
}
if tagScore, ok := userProfile.BBQTags[tagIDStr]; ok {
if tagScore > 0 {
featureLog.MatchBBQTagLevel3 = 1
featureValueMap[MatchBBQTagLevel3] = 1
matchTagCount++
}
}
// bbq user session tag FIXME
tagID, _ := strconv.ParseInt(tagIDStr, 10, 64)
if timestamp, ok := userProfile.LikeTagIDs[tagID]; ok {
featureLog.SessionLikeTag = 1
timeDiff := math.Max(float64(now-timestamp), 0)
timeDiffScore := math.Max(util.ScoreTimeDiff(timeDiff), 0)
featureValueMap[SessionLikeTag] = timeDiffScore
}
if count, ok := userProfile.PosTagIDs[tagID]; ok {
featureLog.SessionPosPlayTag = count
featureValueMap[SessionPosPlayTag] = util.ScoreCount(float64(count))
}
if count, ok := userProfile.NegTagIDs[tagID]; ok {
featureLog.SessionNegPlayTag = count
featureValueMap[SessionNegPlayTag] = util.ScoreCount(float64(count))
// pure negative tag
if _, ok := userProfile.PosTagIDs[tagID]; !ok {
featureLog.PureNegPlayTag = count
featureValueMap[PureNegPlayTag] = util.ScoreCount(float64(count))
}
}
}
featureLog.MatchBBQTagCount = matchTagCount
featureValueMap[MatchBBQTagCount] = float64(matchTagCount)
if matchTagCount > 0 {
featureValueMap[MatchBBQTagCountScore] = (float64(matchTagCount) + 1.0) / (float64(tagCount) + 1.0)
}
featureLog.MatchBiliTagCount = matchBiliTagCount
featureValueMap[MatchBiliTagCount] = float64(matchBiliTagCount)
if matchBiliTagCount > 0 {
featureValueMap[MatchBiliTagCountScore] = (float64(matchBiliTagCount) + 1.0) / (float64(tagCount) + 1.0)
}
// user-up feature
upMidStr := record.Map[model.UperMid]
upMid, _ := strconv.ParseInt(upMidStr, 10, 64)
if _, ok := userProfile.FollowUps[upMid]; ok {
featureLog.BiliPrefUp = 1
featureValueMap[BiliPrefUp] = 1
}
if _, ok := userProfile.BBQPrefUps[upMid]; ok {
featureLog.BBQPrefUp = 1
featureValueMap[BBQPrefUp] = 1
}
//最近关注
if _, ok := userProfile.BBQFollowAction[upMid]; ok {
featureLog.SessionBBQFollow = 1
featureValueMap[SessionBBQFollow] = 1
}
//全部关注
if _, ok := userProfile.BBQFollow[upMid]; ok {
featureLog.BBQFollow = 1
featureValueMap[BBQFollow] = 1
}
//拉黑
if _, ok := userProfile.BBQBlack[upMid]; ok {
featureLog.BBQBlack = 1
featureValueMap[BBQBlack] = 1
}
//Up feature TODO
// item feature
stateStr := record.Map[model.State]
state, _ := strconv.ParseInt(stateStr, 10, 64)
if state == model.State5 {
featureLog.OperationLevel = 1
featureValueMap[OperationLevel] = 1
}
featureLog.State = state
pubTime, _ := strconv.ParseInt(record.Map[model.PubTime], 10, 64)
featureLog.PubTime = pubTime
featureLog.PubTimeToNow = now - pubTime
if play, ok := record.Map[model.PlayHive]; ok {
playNum, err := strconv.ParseFloat(play, 64)
if err == nil {
playNumScore := math.Log10(math.Min(playNum+1.0, 1000000.0)) / math.Log10(1000000.0)
featureLog.PlayB = int64(playNum)
featureValueMap[BiliPlayNum] = playNumScore
}
if fav, ok := record.Map[model.FavHive]; ok {
favNum, _ := strconv.ParseFloat(fav, 64)
favScore := (math.Min(favNum, playNum) + 1.0) / (playNum + 200.0)
favScore = math.Min(favScore, 0.1)
featureLog.FavB = int64(favNum)
featureValueMap[BiliFavRatio] = favScore
}
if likes, ok := record.Map[model.LikesHive]; ok {
likesNum, _ := strconv.ParseFloat(likes, 64)
likesScore := (math.Min(likesNum, playNum) + 1.0) / (playNum + 100.0)
likesScore = math.Min(likesScore, 0.1)
featureLog.LikeB = int64(likesNum)
featureValueMap[BiliLikeRatio] = likesScore
}
if share, ok := record.Map[model.ShareHive]; ok {
shareNum, _ := strconv.ParseFloat(share, 64)
shareScore := (math.Min(shareNum, playNum) + 1.0) / (playNum + 500.0)
shareScore = math.Min(shareScore, 0.1)
featureLog.ShareB = int64(shareNum)
featureValueMap[BiliShareRatio] = shareScore
}
if coin, ok := record.Map[model.CoinHive]; ok {
coinNum, _ := strconv.ParseFloat(coin, 64)
coinScore := (math.Min(coinNum, playNum) + 1.0) / (playNum + 200.0)
coinScore = math.Min(coinScore, 0.1)
featureLog.CoinB = int64(coinNum)
featureValueMap[BiliCoinRatio] = coinScore
}
if reply, ok := record.Map[model.ReplyHive]; ok {
replyNum, _ := strconv.ParseFloat(reply, 64)
replyScore := (math.Min(replyNum, playNum) + 1.0) / (playNum + 500.0)
replyScore = math.Min(replyScore, 0.1)
featureLog.ReplyB = int64(replyNum)
featureValueMap[BiliReplyRatio] = replyScore
}
}
// bbq video feature
playMonthTotal, _ := strconv.ParseFloat(record.Map[model.PlayMonthTotal], 64)
featureLog.PlayBBQTotal = int64(playMonthTotal)
featureValueMap[BBQPlayTotal] = playMonthTotal
if play, ok := record.Map[model.PlayMonth]; ok {
playNum, _ := strconv.ParseFloat(play, 64)
playNumScore := math.Log10(math.Min(playNum+1.0, 1000000.0)) / math.Log10(1000000.0)
featureLog.PlayBBQ = int64(playNum)
featureValueMap[BBQPlayNum] = playNumScore
featureLog.PlayBBQ = int64(playNum)
if likes, ok := record.Map[model.LikesMonth]; ok {
likesNum, _ := strconv.ParseFloat(likes, 64)
likesScore := (math.Min(likesNum, playNum) + 1.0) / (playNum + 100.0)
likesScore = math.Min(likesScore, 0.1)
featureLog.LikeBBQ = int64(likesNum)
featureValueMap[BBQLikeRatio] = likesScore
}
if share, ok := record.Map[model.ShareMonth]; ok {
shareNum, _ := strconv.ParseFloat(share, 64)
shareScore := (math.Min(shareNum, playNum) + 1.0) / (playNum + 500.0)
shareScore = math.Min(shareScore, 0.1)
featureLog.ShareBBQ = int64(shareNum)
featureValueMap[BBQShareRatio] = shareScore
}
if reply, ok := record.Map[model.ReplyMonth]; ok {
replyNum, _ := strconv.ParseFloat(reply, 64)
replyScore := (math.Min(replyNum, playNum) + 1.0) / (playNum + 500.0)
replyScore = math.Min(replyScore, 0.1)
featureLog.ReplyBBQ = int64(replyNum)
featureValueMap[BBQReplyRatio] = replyScore
}
}
return
}

View File

@@ -0,0 +1,395 @@
package rank
import (
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"fmt"
"go-common/app/service/bbq/recsys/model"
"strings"
)
func (rankModel *RankModel) buildFeatures(request *recsys.RecsysRequest, response *recsys.RecsysResponse, userProfile *model.UserProfile) (featureLogs []*FeatureLog) {
featureLogs = make([]*FeatureLog, len(response.List))
for index, record := range response.List {
featureLog, featureValueMap := BuildFeature(record, userProfile)
//FIXME ...
featureValueMap["test"] = 1
featureLogs[index] = featureLog
}
return
}
func (rankModel *RankModel) buildInstancesV1(featureLogs []*FeatureLog) (instances []*Instance) {
featureMap := map[string]int64{
"HotRecall": 0,
"LikeI2IRecall": 1,
"LikeTagRecall": 2,
"LikeUPRecall": 3,
"NewPublishRecall": 4,
"PosI2IRecall": 5,
"PosTagRecall": 6,
"SelectionRecall": 7,
"UserProfileBili": 8,
}
featureSize := len(featureMap)
instances = make([]*Instance, len(featureLogs))
for i, instance := range instances {
featureLog := featureLogs[i]
featureValues := make([]float64, featureSize)
//TODO recall feature: single recall -> multiple recall
if index, ok := featureMap[featureLog.RetrieveName]; ok {
featureValues[index] = 1
}
instance = &Instance{
record: featureLog.record,
featureValues: &featureValues,
}
instances[i] = instance
}
return
}
func (rankModel *RankModel) buildInstancesV2(featureLogs []*FeatureLog) (instances []*Instance) {
featureMap := map[string]int64{
"HotRecall": 0,
"LikeI2IRecall": 1,
"LikeTagRecall": 2,
"LikeUPRecall": 3,
"NewPublishRecall": 4,
"PosTagRecall": 5,
"SelectionRecall": 6,
"UserProfileBBQ": 7,
"UserProfileBili": 8,
"play_hive": 9,
"likes_hive": 10,
"fav_hive": 11,
"reply_hive": 12,
"share_hive": 13,
"coin_hive": 14,
"play_month_finish": 15,
"play_month": 16,
"likes_month": 17,
"reply_month": 18,
"share_month": 19,
"has_tag_count": 20,
"contains_tag_count": 21,
}
featureSize := len(featureMap)
instances = make([]*Instance, len(featureLogs))
for i := range instances {
featureLog := featureLogs[i]
//TODO
featureValues := make([]float64, featureSize)
if index, ok := featureMap[featureLog.RetrieveName]; ok {
featureValues[index] = 1
}
featureValues[featureMap["play_hive"]] = float64(featureLog.PlayB)
featureValues[featureMap["likes_hive"]] = float64(featureLog.LikeB)
featureValues[featureMap["fav_hive"]] = float64(featureLog.FavB)
featureValues[featureMap["reply_hive"]] = float64(featureLog.ReplyB)
featureValues[featureMap["share_hive"]] = float64(featureLog.ShareB)
featureValues[featureMap["coin_hive"]] = float64(featureLog.CoinB)
featureValues[featureMap["play_month_finish"]] = float64(featureLog.PlayBBQFinish)
featureValues[featureMap["play_month"]] = float64(featureLog.PlayBBQ)
featureValues[featureMap["likes_month"]] = float64(featureLog.LikeBBQ)
featureValues[featureMap["reply_month"]] = float64(featureLog.ReplyBBQ)
featureValues[featureMap["share_month"]] = float64(featureLog.ShareBBQ)
if featureLog.MatchBBQTagCount > 0 {
featureValues[featureMap["has_tag_count"]] = 1.0
} else {
featureValues[featureMap["has_tag_count"]] = 0
}
featureValues[featureMap["contains_tag_count"]] = float64(featureLog.MatchBBQTagCount)
instance := &Instance{
record: featureLog.record,
featureValues: &featureValues,
featureLog: featureLog,
}
instances[i] = instance
}
return
}
func (rankModel *RankModel) buildInstancesV3(featureLogs []*FeatureLog) (instances []*Instance) {
featureMap := map[string]int64{
"HotRecall": 0,
"RandomRecall": 1,
"SelectionRecall": 2,
"UserProfileBili": 3,
"UserProfileBBQ": 4,
"LikeI2IRecall": 5,
"LikeTagRecall": 6,
"LikeUPRecall": 7,
"PosI2IRecall": 8,
"PosTagRecall": 9,
"FollowRecall": 10,
"play_hive": 11,
"fav_hive": 12,
"reply_hive": 13,
"share_hive": 14,
"coin_hive": 15,
"play_month_finish": 16,
"play_month": 17,
"likes_month": 18,
"reply_month": 19,
"share_month": 20,
"has_tag_count": 21,
"contains_tag_count": 22,
}
featureSize := len(featureMap)
instances = make([]*Instance, len(featureLogs))
for i := range instances {
featureLog := featureLogs[i]
//TODO
featureValues := make([]float64, featureSize)
recallClasses := strings.Split(featureLog.RecallClasses, "|")
for _, recallClass := range recallClasses {
if index, ok := featureMap[recallClass]; ok {
featureValues[index] = 1
}
}
featureValues[featureMap["play_hive"]] = float64(featureLog.PlayB)
//featureValues[featureMap["likes_hive"]] = float64(featureLog.LikeB)
featureValues[featureMap["fav_hive"]] = float64(featureLog.FavB)
featureValues[featureMap["reply_hive"]] = float64(featureLog.ReplyB)
featureValues[featureMap["share_hive"]] = float64(featureLog.ShareB)
featureValues[featureMap["coin_hive"]] = float64(featureLog.CoinB)
featureValues[featureMap["play_month_finish"]] = float64(featureLog.PlayBBQFinish)
featureValues[featureMap["play_month"]] = float64(featureLog.PlayBBQ)
featureValues[featureMap["likes_month"]] = float64(featureLog.LikeBBQ)
featureValues[featureMap["reply_month"]] = float64(featureLog.ReplyBBQ)
featureValues[featureMap["share_month"]] = float64(featureLog.ShareBBQ)
if featureLog.MatchBBQTagCount > 0 {
featureValues[featureMap["has_tag_count"]] = 1.0
} else {
featureValues[featureMap["has_tag_count"]] = 0
}
featureValues[featureMap["contains_tag_count"]] = float64(featureLog.MatchBBQTagCount)
instance := &Instance{
record: featureLog.record,
featureValues: &featureValues,
featureLog: featureLog,
}
instances[i] = instance
}
return
}
func (rankModel *RankModel) buildInstancesV12(featureLogs []*FeatureLog) (instances []*Instance) {
featureMap := map[string]int64{
"HotRecall": 0,
"RandomRecall": 1,
"SelectionRecall": 2,
"UserProfileBili": 3,
"UserProfileBBQ": 4,
"LikeI2IRecall": 5,
"LikeTagRecall": 6,
"LikeUPRecall": 7,
"PosI2IRecall": 8,
"PosTagRecall": 9,
"FollowRecall": 10,
"has_tag_count": 11,
"contains_tag_count": 12,
"has_zone_count": 13,
"play_hive": 14,
"fav_hive": 15,
"reply_hive": 16,
"share_hive": 17,
"coin_hive": 18,
"play_month_finish": 19,
"play_month": 20,
"likes_month": 21,
"reply_month": 22,
"share_month": 23,
}
featureSize := len(featureMap)
instances = make([]*Instance, len(featureLogs))
for i := range instances {
featureLog := featureLogs[i]
featureValues := make([]float64, featureSize)
recallClasses := strings.Split(featureLog.RecallClasses, "|")
for _, recallClass := range recallClasses {
if index, ok := featureMap[recallClass]; ok {
featureValues[index] = 1
}
}
featureValues[featureMap["play_hive"]] = float64(featureLog.PlayB)
featureValues[featureMap["fav_hive"]] = float64(featureLog.FavB)
featureValues[featureMap["reply_hive"]] = float64(featureLog.ReplyB)
featureValues[featureMap["share_hive"]] = float64(featureLog.ShareB)
featureValues[featureMap["coin_hive"]] = float64(featureLog.CoinB)
featureValues[featureMap["play_month_finish"]] = float64(featureLog.PlayBBQFinish)
featureValues[featureMap["play_month"]] = float64(featureLog.PlayBBQ)
featureValues[featureMap["likes_month"]] = float64(featureLog.LikeBBQ)
featureValues[featureMap["reply_month"]] = float64(featureLog.ReplyBBQ)
featureValues[featureMap["share_month"]] = float64(featureLog.ShareBBQ)
if featureLog.MatchBBQTagCount > 0 {
featureValues[featureMap["has_tag_count"]] = 1.0
} else {
featureValues[featureMap["has_tag_count"]] = 0
}
if featureLog.MatchBBQTagLevel2 > 0 {
featureValues[featureMap["has_zone_count"]] = 1.0
} else {
featureValues[featureMap["has_zone_count"]] = 0
}
featureValues[featureMap["contains_tag_count"]] = float64(featureLog.MatchBBQTagCount)
instance := &Instance{
record: featureLog.record,
featureValues: &featureValues,
featureLog: featureLog,
}
instances[i] = instance
}
return
}
func (rankModel *RankModel) buildInstancesV13(featureLogs []*FeatureLog) (instances []*Instance) {
featureMap := map[string]int64{
"zone-bucket-168": 0,
"zone-bucket-75": 1,
"play_hive": 2,
"zone-bucket-95": 3,
"likes_month": 4,
"state-bucket-3": 5,
"share_month": 6,
"recall-PosTagRecall": 7,
"zone-bucket-124": 8,
"recall-PosI2IRecall": 9,
"state-bucket-4": 10,
"recall-SelectionRecall": 11,
"zone-bucket-156": 12,
"contains_tag_count": 13,
"zone-bucket-158": 14,
"zone-bucket-183": 15,
"zone-bucket-184": 16,
"zone-bucket-21": 17,
"zone-bucket-154": 18,
"zone-bucket-159": 19,
"zone-bucket-85": 20,
"recall-LikeUPRecall": 21,
"reply_month": 22,
"state-bucket-1": 23,
"zone-bucket-96": 24,
"has_tag_count": 25,
"zone-bucket-86": 26,
"zone-bucket-138": 27,
"zone-bucket-182": 28,
"play_month_finish": 29,
"recall-HotRecall": 30,
"zone-bucket-157": 31,
"zone-bucket-20": 32,
"zone-bucket-39": 33,
"zone-bucket-161": 34,
"reply_hive": 35,
"recall-LikeTagRecall": 36,
"zone-bucket-76": 37,
"zone-bucket-98": 38,
"state-bucket-5": 39,
"zone-bucket-22": 40,
"zone-bucket-27": 41,
"zone-bucket-122": 42,
"zone-bucket-176": 43,
"recall-UserProfileBBQ": 44,
"recall-UserProfileBili": 45,
"zone-bucket-163": 46,
"zone-bucket-30": 47,
"zone-bucket-31": 48,
"zone-bucket-59": 49,
"recall-LikeI2IRecall": 50,
"zone-bucket-25": 51,
"zone-bucket-28": 52,
"zone-bucket-24": 53,
"zone-bucket-29": 54,
"zone-bucket-164": 55,
"coin_hive": 56,
"play_month": 57,
"share_hive": 58,
"recall-RandomRecall": 59,
"fav_hive": 60,
"zone-bucket-162": 61,
"likes_hive": 62,
"recall-FollowRecall": 63,
"zone-bucket-47": 64,
}
featureSize := len(featureMap)
instances = make([]*Instance, len(featureLogs))
for i := range instances {
featureLog := featureLogs[i]
featureValues := make([]float64, featureSize)
recallClasses := strings.Split(featureLog.RecallClasses, "|")
for _, recallClass := range recallClasses {
recallClass = fmt.Sprintf("recall-%s", recallClass)
if index, ok := featureMap[recallClass]; ok {
featureValues[index] = 1
}
}
featureValues[featureMap["play_hive"]] = float64(featureLog.PlayB)
featureValues[featureMap["likes_hive"]] = float64(featureLog.LikeB)
featureValues[featureMap["fav_hive"]] = float64(featureLog.FavB)
featureValues[featureMap["reply_hive"]] = float64(featureLog.ReplyB)
featureValues[featureMap["share_hive"]] = float64(featureLog.ShareB)
featureValues[featureMap["coin_hive"]] = float64(featureLog.CoinB)
featureValues[featureMap["play_month_finish"]] = float64(featureLog.PlayBBQFinish)
featureValues[featureMap["play_month"]] = float64(featureLog.PlayBBQ)
featureValues[featureMap["likes_month"]] = float64(featureLog.LikeBBQ)
featureValues[featureMap["reply_month"]] = float64(featureLog.ReplyBBQ)
featureValues[featureMap["share_month"]] = float64(featureLog.ShareBBQ)
if featureLog.MatchBBQTagCount > 0 {
featureValues[featureMap["has_tag_count"]] = 1.0
} else {
featureValues[featureMap["has_tag_count"]] = 0
}
featureValues[featureMap["contains_tag_count"]] = float64(featureLog.MatchBBQTagCount)
//bucket features
zoneKey := fmt.Sprintf("zone-bucket-%d", featureLog.ZoneID)
if index, ok := featureMap[zoneKey]; ok {
featureValues[index] = 1
}
stateKey := fmt.Sprintf("state-bucket-%d", featureLog.State)
if index, ok := featureMap[stateKey]; ok {
featureValues[index] = 1
}
instance := &Instance{
record: featureLog.record,
featureValues: &featureValues,
featureLog: featureLog,
}
instances[i] = instance
}
return
}

View File

@@ -0,0 +1,111 @@
package rank
import (
"fmt"
"io/ioutil"
"os"
"strings"
xgb "go-common/app/service/bbq/recsys/service/rank/treelite"
"go-common/library/log"
"github.com/gogo/protobuf/proto"
)
func (m *RankModelManager) loadModels() (models []*RankModel, err error) {
modelFileNamesStr := os.Getenv(EnvPredictModelDirs)
log.Info("modelFileNamesStr: %s", modelFileNamesStr)
if modelFileNamesStr == "" {
return nil, fmt.Errorf("env variable %s is empty", EnvPredictModelDirs)
}
models = make([]*RankModel, 0)
modelFileNames := strings.Split(modelFileNamesStr, ",")
for _, modelFileDir := range modelFileNames {
modelFileName := fmt.Sprintf("%s/model.proto", modelFileDir)
if modelFileName == "" {
err = fmt.Errorf("model name is empty: %s", modelFileNamesStr)
continue
}
var model *xgb.Model
model, err = m.readModel(modelFileName)
if model == nil || err != nil {
log.Error("read model error: (%v)", err)
continue
}
log.Info("xgb NumOutputGroup:%d, NFeatures:%d, NEstimators:%d\n", model.GetNumOutputGroup(), model.GetNumFeature(), len(model.Trees))
// model & feature conf
if strings.Contains(modelFileName, "0.0.13") {
rankModel := &RankModel{
name: fmt.Sprintf(ModelNameTemplate, "0.0.13"),
model: model,
score: model.PredictSingle,
}
rankModel.buildInstances = rankModel.buildInstancesV13
models = append(models, rankModel)
} else if strings.Contains(modelFileName, "0.0.12") {
rankModel := &RankModel{
name: fmt.Sprintf(ModelNameTemplate, "0.0.12"),
model: model,
score: model.PredictSingle,
}
rankModel.buildInstances = rankModel.buildInstancesV12
models = append(models, rankModel)
} else if strings.Contains(modelFileName, "0.0.11") {
rankModel := &RankModel{
name: fmt.Sprintf(ModelNameTemplate, "0.0.11"),
model: model,
score: model.PredictSingle,
}
rankModel.buildInstances = rankModel.buildInstancesV3
models = append(models, rankModel)
} else if strings.Contains(modelFileName, "0.0.5") {
rankModel := &RankModel{
name: fmt.Sprintf(ModelNameTemplate, "0.0.5"),
model: model,
score: model.PredictSingle,
}
rankModel.buildInstances = rankModel.buildInstancesV2
models = append(models, rankModel)
} else if strings.Contains(modelFileName, "0.0.4") {
rankModel := &RankModel{
name: fmt.Sprintf(ModelNameTemplate, "0.0.4"),
model: model,
score: model.PredictSingle,
}
rankModel.buildInstances = rankModel.buildInstancesV1
models = append(models, rankModel)
}
}
return models, err
}
func (m *RankModelManager) readModel(modelFileName string) (model *xgb.Model, err error) {
model = &xgb.Model{}
data, err := ioutil.ReadFile(modelFileName)
if err != nil {
return nil, err
}
log.Info("read model success: ", modelFileName)
err = proto.Unmarshal(data, model)
if err != nil {
return nil, err
}
valid, err := model.ValidateModel()
if !valid || err != nil {
log.Error("model is not valid %v", err)
return nil, err
}
// test
vals := []float64{0, 0, 1, 0, 0, 0}
fvals := make([]float64, model.GetNumFeature())
copy(vals, fvals)
p := model.PredictSingle(fvals)
log.Info("xgb Test Prediction for %v: %f\n", fvals, p)
return model, err
}

View File

@@ -0,0 +1,102 @@
package rank
import (
"errors"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
xgb "go-common/app/service/bbq/recsys/service/rank/treelite"
"go-common/app/service/bbq/recsys/service/util"
"go-common/library/log"
"sort"
"strconv"
)
//Rank Model Const
const (
DefaultModelKey = "xgb_model_v0.0.13"
ModelNameTemplate = "xgb_model_v%s"
EnvPredictModelDirs = "ENV_PREDICT_MODLE_DIR_LIST"
)
//RankModelManager ...
type RankModelManager struct {
RankModels map[string]*RankModel
}
//NewRankModelManager ...
func NewRankModelManager() (m *RankModelManager) {
m = &RankModelManager{}
models, err := m.loadModels()
if len(models) == 0 && err != nil {
log.Error("load model error %v", err)
return
}
m.RankModels = make(map[string]*RankModel, len(models))
for _, model := range models {
m.RankModels[model.name] = model
}
return
}
//RankModel ...
type RankModel struct {
model *xgb.Model
name string
score func([]float64) float64
buildInstances func(featureLogs []*FeatureLog) (instances []*Instance)
}
//FeatureConf ...
type FeatureConf struct {
}
//Instance ...
type Instance struct {
record *recsys.RecsysRecord
featureLog *FeatureLog
featureValues *[]float64
}
//DoRank ...
func (m *RankModelManager) DoRank(request *recsys.RecsysRequest, response *recsys.RecsysResponse, userProfile *model.UserProfile) (err error) {
// 1.0 choose model
rankModel, ok := m.RankModels[DefaultModelKey]
if !ok {
return errors.New("rank model is missing")
}
response.Message[model.RankModelName] = rankModel.name
return rankModel.rank(request, response, userProfile)
}
func (rankModel *RankModel) rank(request *recsys.RecsysRequest, response *recsys.RecsysResponse, userProfile *model.UserProfile) (err error) {
// 2.0 init/load model & feature conf
// 3.0 build feature
// 3.0 build instances (feature + conf + operator -> instance)
// 4.0 score each record
// 5.0 rank
//build features
features := rankModel.buildFeatures(request, response, userProfile)
instances := rankModel.buildInstances(features)
for _, instance := range instances {
//rankModel.score(instance)
score := rankModel.model.PredictSingle(*instance.featureValues)
instance.record.Score = score
instance.featureLog.Score = score
}
sort.Sort(sort.Reverse(util.Records(response.List)))
for index, record := range response.List {
record.Map[model.OrderRanker] = strconv.Itoa(index)
record.Map[model.RankModelScore] = strconv.FormatFloat(record.Score, 'f', -1, 64)
}
return
}

View File

@@ -0,0 +1,49 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["predict.go"],
embed = [":treelite_go_proto"],
importpath = "go-common/app/service/bbq/recsys/service/rank/treelite",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["@com_github_gogo_protobuf//proto:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
proto_library(
name = "treelite_proto",
srcs = ["tree.proto"],
tags = ["automanaged"],
)
go_proto_library(
name = "treelite_go_proto",
compilers = ["@io_bazel_rules_go//proto:go_proto"],
importpath = "go-common/app/service/bbq/recsys/service/rank/treelite",
proto = ":treelite_proto",
tags = ["automanaged"],
)

View File

@@ -0,0 +1,67 @@
package treelite
import (
"errors"
"math"
"strconv"
)
//Predict Predict single tree
func (node *Node) Predict(fvals []float64) float64 {
if node.LeftChild == nil && node.RightChild == nil {
return node.GetLeafValue()
}
for {
val := fvals[node.GetSplitIndex()]
if val < node.GetThreshold() {
return node.GetLeftChild().Predict(fvals)
}
return node.GetRightChild().Predict(fvals)
}
}
//ValidateModel ...
func (model *Model) ValidateModel() (valid bool, err error) {
if model.GetNumFeature() < 1 {
err = errors.New("number of Feature < 1")
return false, err
}
if model.GetNumOutputGroup() != 1 {
err = errors.New("number of output group != 1")
return false, err
}
if model.GetRandomForestFlag() {
err = errors.New("do not support random forest model now")
return false, err
}
for _, tree := range model.Trees {
if tree.GetHead().GetSplitType() != Node_NUMERICAL {
err = errors.New("tree.GetHead().GetSplitType() != Node_NUMERICAL")
return false, err
}
}
return true, nil
}
//PredictSingle ...
func (model *Model) PredictSingle(fvals []float64) (predictVal float64) {
//if predTransform, ok := model.ExtraParams["pred_transform"]; ok {
// switch predTransform {
// case "sigmoid":
//
// }
//}
predictVal, _ = strconv.ParseFloat(model.ExtraParams["global_bias"], 64)
for _, tree := range model.Trees {
predictVal += tree.GetHead().Predict(fvals)
}
predictVal = sigmoid(predictVal)
return predictVal
}
func sigmoid(x float64) (y float64) {
return 1.0 / (1.0 + math.Exp(-x))
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
syntax = "proto2";
package treelite;
message Model {
repeated Tree trees = 1;
optional int32 num_feature = 2;
optional int32 num_output_group = 3; // >1 for multi-class classification;
// =1 for everything else
optional bool random_forest_flag = 4; // true for random forest
// false for gradient boosted trees
map<string, string> extra_params = 5; // extra parameters
}
message Tree {
optional Node head = 1;
}
message Node {
optional Node left_child = 1; // Left child; missing if leaf
optional Node right_child = 2; // Right child; missing if leaf
optional bool default_left = 3; // Default direction for missing values
// true: default to left
// false: default to right
optional int32 split_index = 4; // Feature index used for the split;
// missing if leaf
enum SplitFeatureType {
NUMERICAL = 0;
CATEGORICAL = 1;
}
optional SplitFeatureType split_type = 5;
// Type of feature used for the split
// missing if leaf
optional string op = 6; // Operation used for comparison (e.g. "<")
// of form [feature value] OP [threshold].
// The left child is taken if the
// expression evaluates to true; the right
// child is taken otherwise.
// missing if leaf or categorical split
optional double threshold = 7; // Decision threshold
// missing if leaf or categorical split
repeated uint32 left_categories = 8;
// List of all categories belonging to
// the left child. All other categories
// will belong to the right child.
// missing if leaf or numerical split
optional double leaf_value = 9; // Leaf value; missing if non-leaf
// also missing if leaf_vector field exists
repeated double leaf_vector = 10; // Usually missing; only used for random
// forests with multi-class classification
optional uint64 data_count = 11; // number of data points whose traversal
// paths include this node. May be
// ommitted if unavailable
optional double sum_hess = 12; // sum of hessian values for all data
// points whose traversal paths include
// this node. This value is generally
// correlated positively with the data
// count. May be omitted if unavailable
optional double gain = 13; // change in loss that is attributed to
// particular split; may be omitted if
// unavailable
}

View File

@@ -0,0 +1,155 @@
package service
import (
"context"
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/util"
"go-common/library/log"
"go-common/library/net/trace"
"strconv"
"strings"
"github.com/json-iterator/go"
)
//Start this just a example
func (s *Service) Start(c context.Context, req *rpc.RecsysRequest) (res *rpc.RecsysResponse, err error) {
return s.RecService(c, req)
}
//RecService recommend service
func (s *Service) RecService(c context.Context, req *rpc.RecsysRequest) (response *rpc.RecsysResponse, err error) {
//请求日志
data1, err := jsoniter.Marshal(req)
if err == nil {
log.Info("recsys request is %s:", data1)
}
// 0.0 pre process
tracer, _ := trace.FromContext(c)
req.TraceID = fmt.Sprintf("%s", tracer)
response = new(rpc.RecsysResponse)
response.Message = make(map[string]string)
// 0.1 ab test
s.DoABTest(req)
// 1.0 get user profile
userProfile, err := s.dao.LoadUserProfile(c, req.MID, req.BUVID)
if req.MID != 0 {
if err = s.dao.GetUserFollow(c, req.MID, userProfile); err != nil {
log.Errorv(c, log.KV("userLog", "query user follow fail"), log.KV("MID", req.MID))
err = nil
}
if err = s.dao.GetUserBlack(c, req.MID, userProfile); err != nil {
log.Errorv(c, log.KV("userLog", "query user black fail"), log.KV("MID", req.MID))
err = nil
}
}
// 2.0 query rewrite, parallel retrieve
response, err = s.recallManager.V2RetrieveFunc(c, req, userProfile, s.dao.RecallClient)
//is or not debug
if req.DebugFlag {
recallStatCountMap := make(map[string]int)
recallTagStatCountMap := make(map[string]int)
for index, record := range response.List {
record.Map[model.OrderRecall] = strconv.Itoa(index)
recallClasses := record.Map[model.RecallClasses]
for _, recallClass := range strings.Split(recallClasses, "|") {
recallStatCountMap[recallClass] = recallStatCountMap[recallClass] + 1
}
recallTags := record.Map[model.RecallTags]
for _, recallTag := range strings.Split(recallTags, "|") {
recallTagStatCountMap[recallTag] = recallTagStatCountMap[recallTag] + 1
}
}
response.Message["DebugStatRecallCntTotal"] = strconv.Itoa(len(response.List))
recallStatCountList := util.SortStrIntMapByValue(recallStatCountMap)
recallCountStr, _ := jsoniter.MarshalToString(recallStatCountList)
response.Message["DebugStatRecallCntDetail"] = recallCountStr
recallTagStatCountList := util.SortStrIntMapByValue(recallTagStatCountMap)
recallTagCountStr, _ := jsoniter.MarshalToString(recallTagStatCountList)
response.Message["DebugStatRecallTagCntDetail"] = recallTagCountStr
}
//2.1 down grade recall
if err != nil || len(response.List) == 0 {
response, err = s.dao.DownGradeRecall(c)
}
// 3.0 merge && filter
s.filterManager.filter(req, response, userProfile)
// 4.0 ranker
// 4.0.0
s.businessInfoCount.State(model.ResponseCount, int64(len(response.List)))
response.Message[model.ResponseCount] = strconv.Itoa(len(response.List))
// 4.0.1 prepare feature
// 4.0.2 do rank
if req.Abtest == ABTestA || req.MID == 5829468 {
err = s.rankModelManager.DoRank(req, response, userProfile)
if err != nil {
log.Error("rank model failed (%v)", err)
s.rankManager.rank(c, req, response, userProfile, s.dao)
err = nil
}
} else {
s.rankManager.rank(c, req, response, userProfile, s.dao)
}
// 5.0 post process, apply rule, page, store results
// 5.1 post process
err = s.postProcessor.ProcessRec(c, req, response, userProfile)
size := len(response.List)
if size == 0 {
log.Error("response is empty! request is (%v)", req)
response = &rpc.RecsysResponse{
Message: make(map[string]string),
List: make([]*rpc.RecsysRecord, 0),
}
response.Message["info"] = "response is empty!"
return
}
for index, record := range response.List {
record.Map[model.OrderFinal] = strconv.Itoa(index)
}
//debug log
if req.DebugFlag {
data, _ := jsoniter.Marshal(userProfile)
response.Message["UserInfo"] = string(data)
return
}
// 5.2 page
limit := int(req.Limit)
if limit > size {
limit = size
}
response.List = response.List[0:limit]
// 5.3 store results
s.dao.StoreRecResults(c, userProfile, req.MID, req.BUVID, response, s.dao.LastPageRedisKey, userProfile.LastRecords)
// 5.4 store feature log && reduce record keys
s.StoreLog(req, response, userProfile, "bbq-recsys")
return
}

View File

@@ -0,0 +1,183 @@
package service
import (
"context"
"fmt"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/retrieve"
"go-common/app/service/bbq/recsys/service/util"
"go-common/library/log"
"go-common/library/net/trace"
"math"
"sort"
"strconv"
"strings"
)
//RelatedRecService ...
func (s *Service) RelatedRecService(c context.Context, req *rpc.RecsysRequest) (response *rpc.RecsysResponse, err error) {
// 0.0 pre process: ab test
// 0.0 pre process
tracer, _ := trace.FromContext(c)
req.TraceID = fmt.Sprintf("%s", tracer)
// 1.0 get user profile
userProfile := &model.UserProfile{Mid: req.MID, Buvid: req.BUVID}
//if userProfile, err = s.dao.GetUserProfile(c, req.MID, req.BUVID); err != nil {
// log.Warn("get user profile failed, mid: ", req.MID)
//}
// 2.0 query rewrite, retrieve
response, err = s.recallManager.RelatedRec(c, req, s.dao.RecallClient)
// 3.0 filter
s.filterManager.relatedFilter(req, response, userProfile)
//fmt.Println("response size after filter:", len(response.List))
// 4.0 ranker
// 4.0.0
// 4.0.1 prepare feature
// 4.0.2 do rank
rankRelated(response)
// 5.0 post process, apply rule, page, store results
//s.postProcessor.process(c, response)
size := len(response.List)
if size == 0 {
log.Error("Related_response is empty!")
response = &rpc.RecsysResponse{
Message: make(map[string]string),
List: make([]*rpc.RecsysRecord, 0),
}
response.Message["info"] = "Related_response is empty!"
return
}
// 5.2 page
limit := int(req.Limit)
if limit > size {
limit = size
}
response.List = response.List[0:limit]
s.StoreLog(req, response, userProfile, "relatedrec")
return
}
func rankRelated(response *rpc.RecsysResponse) {
sourceZoneID := response.Message[retrieve.SourceZoneID]
sourceTagIDs := strings.Split(response.Message[retrieve.SourceTagIDs], "|")
for _, record := range response.List {
retriever := record.Map[model.Retriever]
if retriever == retrieve.I2iRecall {
record.Score = 1.2
} else if retriever == retrieve.I2tag2iRecall {
record.Score = 1.0
} else if retriever == retrieve.I2tag2iRecall {
record.Score = 0.5
} else if retriever == retrieve.HotRecall {
record.Score = 0
}
zoneScore := 0.0
tagCount := 0
tagCommonCount := 0
if itemTagIDs, ok := record.Map[model.TagsID]; ok {
tagIDs := strings.Split(itemTagIDs, "|")
tagCount = len(tagIDs)
for _, tagID := range tagIDs {
if tagID == sourceZoneID {
zoneScore = 0.5
}
for _, sourceTagID := range sourceTagIDs {
if tagID == sourceTagID {
tagCommonCount++
}
}
}
}
record.Score += zoneScore
tagScore := 0.0
if tagCommonCount > 0 {
tagScore += 0.5
}
tagScore += 0.5 * (float64(tagCommonCount) + 1) / (float64(tagCount) + 1)
stateStr := record.Map[model.State]
state, _ := strconv.ParseInt(stateStr, 10, 64)
if state == model.State5 {
record.Score += 0.3
} else if state == model.State4 {
record.Score += 0.1
}
if play, ok := record.Map[model.PlayHive]; ok {
playNum, _ := strconv.ParseFloat(play, 64)
playNumScore := math.Log10(math.Min(playNum+1.0, 1000000.0)) / math.Log10(1000000.0)
record.Score += playNumScore
if fav, ok := record.Map[model.FavHive]; ok {
favNum, _ := strconv.ParseFloat(fav, 64)
favScore := (math.Min(favNum, playNum) + 1.0) / (playNum + 200.0)
favScore = math.Min(favScore, 0.1)
record.Score += 5 * favScore
}
if likes, ok := record.Map[model.LikesHive]; ok {
likesNum, _ := strconv.ParseFloat(likes, 64)
likesScore := (math.Min(likesNum, playNum) + 1.0) / (playNum + 200.0)
likesScore = math.Min(likesScore, 0.1)
record.Score += 5 * likesScore
}
if shares, ok := record.Map[model.ShareHive]; ok {
shareNum, _ := strconv.ParseFloat(shares, 64)
shareScore := (math.Min(shareNum, playNum) + 1.0) / (playNum + 200.0)
shareScore = math.Min(shareScore, 0.1)
record.Score += 5 * shareScore
}
}
// bbq video feature
if play, ok := record.Map[model.PlayMonth]; ok {
playNum, _ := strconv.ParseFloat(play, 64)
playNumScore := math.Log10(math.Min(playNum+1.0, 1000000.0)) / math.Log10(1000000.0)
record.Score += playNumScore
if likes, ok := record.Map[model.LikesMonth]; ok {
likesNum, _ := strconv.ParseFloat(likes, 64)
likesScore := (math.Min(likesNum, playNum) + 1.0) / (playNum + 200.0)
likesScore = math.Min(likesScore, 0.1)
record.Score += 5 * likesScore
}
if share, ok := record.Map[model.ShareMonth]; ok {
shareNum, _ := strconv.ParseFloat(share, 64)
shareScore := (math.Min(shareNum, playNum) + 1.0) / (playNum + 200.0)
shareScore = math.Min(shareScore, 0.1)
record.Score += 10 * shareScore
}
if reply, ok := record.Map[model.ReplyMonth]; ok {
replyNum, _ := strconv.ParseFloat(reply, 64)
replyScore := (math.Min(replyNum, playNum) + 1.0) / (playNum + 200.0)
replyScore = math.Min(replyScore, 0.1)
record.Score += 10 * replyScore
}
}
}
sort.Sort(sort.Reverse(util.Records(response.List)))
}

View File

@@ -0,0 +1,44 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"merge.go",
"relatedrecretrieve.go",
"retrieve.go",
"retrieve2.go",
"upsrecretrive.go",
],
importpath = "go-common/app/service/bbq/recsys/service/retrieve",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/api/grpc/v1:go_default_library",
"//app/service/bbq/recsys/model:go_default_library",
"//app/service/bbq/recsys/service/util:go_default_library",
"//app/service/bbq/search/api/grpc/v1:go_default_library",
"//app/service/main/relation/api:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/json-iterator/go:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,170 @@
package retrieve
import (
"fmt"
recallv1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/library/log"
"strconv"
"strings"
)
func deleteBlack(response *recsys.RecsysResponse, userProfile *model.UserProfile) (err error) {
records := make([]*recsys.RecsysRecord, 0)
for _, record := range response.List {
upMID, _ := strconv.ParseInt(record.Map[model.UperMid], 10, 64)
if _, ok := userProfile.BBQBlack[upMID]; ok {
continue
}
records = append(records, record)
}
response.List = records
return
}
func transform(recallResponse *recallv1.RecallResponse, response *recsys.RecsysResponse) (err error) {
if recallResponse == nil {
return
}
response.Message[model.ResponseRecallStat] = fmt.Sprintf("%v", recallResponse.SrcInfo)
for index, video := range recallResponse.List {
if video.ForwardIndex == nil || video.ForwardIndex.BasicInfo == nil {
log.Error("recall forward index null, svid: %v", video.SVID)
continue
}
record := &recsys.RecsysRecord{
Svid: video.SVID,
Score: 0,
Map: make(map[string]string),
}
// 视频基本信息
record.Map[model.Title] = video.ForwardIndex.BasicInfo.Title
record.Map[model.Content] = video.ForwardIndex.BasicInfo.Content
record.Map[model.AVID] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.AVID))
record.Map[model.CID] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.CID))
record.Map[model.State] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.State))
record.Map[model.UperMid] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.MID))
record.Map[model.PubTime] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.PubTime))
record.Map[model.Duration] = strconv.Itoa(int(video.ForwardIndex.BasicInfo.Duration))
tagNames := make([]string, 0)
tagTypes := make([]string, 0)
tagIDs := make([]string, 0)
for _, tag := range video.ForwardIndex.BasicInfo.Tags {
tagNames = append(tagNames, tag.TagName)
tagTypes = append(tagTypes, strconv.Itoa(int(tag.TagType)))
tagIDs = append(tagIDs, strconv.Itoa(int(tag.TagID)))
if tag.TagType == 2 {
record.Map[model.ZoneID] = strconv.Itoa(int(tag.TagID))
record.Map[model.ZoneName] = tag.TagName
}
}
record.Map[model.TagsName] = strings.Join(tagNames, "|")
record.Map[model.TagsType] = strings.Join(tagTypes, "|")
record.Map[model.TagsID] = strings.Join(tagIDs, "|")
// 视频质量信息
if video.ForwardIndex.VideoQuality != nil {
//bili
record.Map[model.PlayHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.PlayCnt))
record.Map[model.FavHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.FavCnt))
record.Map[model.LikesHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.LikeCnt))
record.Map[model.CoinHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.CoinCnt))
record.Map[model.ReplyHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.CommentAddCnt))
record.Map[model.DanmuHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.DanmuCnt))
record.Map[model.ShareHive] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsM.ShareCnt))
record.Map[model.PlayWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.PlayCnt))
record.Map[model.LikesWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.LikeCnt))
record.Map[model.ReplyWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.CommentAddCnt))
record.Map[model.DanmuWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.DanmuCnt))
record.Map[model.ShareWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.ShareCnt))
record.Map[model.FavWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.FavCnt))
record.Map[model.CoinWeekBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsW.CoinCnt))
record.Map[model.PlayDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.PlayCnt))
record.Map[model.LikesDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.LikeCnt))
record.Map[model.ReplyDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.CommentAddCnt))
record.Map[model.DanmuDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.DanmuCnt))
record.Map[model.ShareDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.ShareCnt))
record.Map[model.FavDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.FavCnt))
record.Map[model.CoinDayBili] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoMsH.CoinCnt))
// bbq
record.Map[model.PlayMonthTotal] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.ImpCnt))
record.Map[model.PlayMonthFinish] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.AbsolutePlayCnt))
record.Map[model.PlayMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.PlayCnt))
record.Map[model.FavMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.FavCnt))
record.Map[model.LikesMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.LikeCnt))
record.Map[model.ReplyMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.CommentAddCnt))
record.Map[model.DanmuMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.DanmuCnt))
record.Map[model.ShareMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.ShareCnt))
//record.Map[model.CommentLikeMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.CommentLikeCnt))
//record.Map[model.CommentReportMonth] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoM.CommentReportCnt))
record.Map[model.PlayWeekFinish] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.AbsolutePlayCnt))
record.Map[model.PlayWeek] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.PlayCnt))
record.Map[model.LikesWeek] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.LikeCnt))
record.Map[model.ReplyWeek] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.CommentAddCnt))
record.Map[model.DanmuWeek] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.DanmuCnt))
record.Map[model.ShareWeek] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoW.ShareCnt))
record.Map[model.PlayDayFinish] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.AbsolutePlayCnt))
record.Map[model.PlayDay] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.PlayCnt))
record.Map[model.LikesDay] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.LikeCnt))
record.Map[model.ReplyDay] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.CommentAddCnt))
record.Map[model.DanmuDay] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.DanmuCnt))
record.Map[model.ShareDay] = strconv.Itoa(int(video.ForwardIndex.VideoQuality.QualityInfoH.ShareCnt))
}
// 召回信息
record.Map[model.RecallScore] = strconv.FormatFloat(float64(video.Score), 'f', -1, 32)
record.Map[model.RecallOrder] = strconv.Itoa(index)
recallTags := make([]string, 0)
recallClasses := make([]string, 0)
for _, invertIndex := range video.InvertedIndexes {
recallTags = append(recallTags, invertIndex.Index)
recallClasses = append(recallClasses, invertIndex.Name)
}
record.Map[model.RecallTags] = strings.Join(recallTags, "|")
record.Map[model.RecallClasses] = strings.Join(recallClasses, "|")
response.List = append(response.List, record)
}
return
}
func mergeRecallKey(recallInfos []*recallv1.RecallInfo) (newRecallInfos []*recallv1.RecallInfo) {
recallTagNameMap := make(map[string][]string)
recallTagInfoMap := make(map[string]*recallv1.RecallInfo)
recallTagPriorityMap := make(map[string]int32)
for _, recallInfo := range recallInfos {
names := recallTagNameMap[recallInfo.Tag]
names = append(names, recallInfo.Name)
recallTagNameMap[recallInfo.Tag] = names
recallTagInfoMap[recallInfo.Tag] = recallInfo
if priority, ok := recallTagPriorityMap[recallInfo.Tag]; ok {
if recallInfo.Priority > priority {
recallTagPriorityMap[recallInfo.Tag] = priority
}
} else {
recallTagPriorityMap[recallInfo.Tag] = priority
}
}
newRecallInfos = make([]*recallv1.RecallInfo, 0)
for tag, names := range recallTagNameMap {
recallInfo := recallTagInfoMap[tag]
recallInfo.Name = strings.Join(names, "|")
newRecallInfos = append(newRecallInfos, recallInfo)
}
return
}

View File

@@ -0,0 +1,140 @@
package retrieve
import (
"context"
"fmt"
"go-common/library/log"
recallv1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"strconv"
"strings"
)
//召回策略
const (
I2iRecall = "I2iRecall"
I2Zone2iRecall = "I2Zone2iRecall"
I2tag2iRecall = "I2tag2iRecall"
)
const (
_recRecalli2i = "RECALL:I2I:%d"
_i2tag2iRecall = "RECALL:HOT_T:%d"
)
//Source Video Info
const (
SourceTagIDs = "SourceTagIDs"
SourceZoneID = "SourceZoneID"
SourceUpMID = "SourceUpMID"
)
//RelatedRec is retrieve function
func (m *RecallManager) RelatedRec(c context.Context, request *recsys.RecsysRequest, recallClient recallv1.RecsysRecallClient) (response *recsys.RecsysResponse, err error) {
recallInfos := make([]*recallv1.RecallInfo, 0)
SVID := request.SVID
zoneID, tagIDs, upMID := tagIDget(c, recallClient, SVID)
i2iRecallInfo := &recallv1.RecallInfo{
Name: I2iRecall,
Tag: fmt.Sprintf(_recRecalli2i, request.SVID),
Limit: 50,
Filter: "",
Priority: 4,
}
recallInfos = append(recallInfos, i2iRecallInfo)
for _, tagID := range tagIDs {
i2Tag2iRecallInfo := &recallv1.RecallInfo{
Name: I2tag2iRecall,
Tag: fmt.Sprintf(_i2tag2iRecall, tagID),
Limit: 100,
Filter: "",
Priority: 3,
}
recallInfos = append(recallInfos, i2Tag2iRecallInfo)
}
i2Zone2iRecallInfo := &recallv1.RecallInfo{
Name: I2Zone2iRecall,
Tag: fmt.Sprintf(_i2tag2iRecall, zoneID),
Limit: 100,
Filter: "",
Priority: 2,
}
recallInfos = append(recallInfos, i2Zone2iRecallInfo)
// hotpoolRecall_Info 100
hotRecallInfo := &recallv1.RecallInfo{
Name: HotRecall,
Tag: RecallHotDefault,
Limit: 50,
Filter: "",
Priority: 1,
}
recallInfos = append(recallInfos, hotRecallInfo)
recallRequest := &recallv1.RecallRequest{
MID: request.MID,
BUVID: request.BUVID,
TotalLimit: 100,
Infos: recallInfos,
}
log.Info("recall request: (%v)", recallRequest)
response = new(recsys.RecsysResponse)
response.Message = make(map[string]string)
recallResponse, err := recallClient.Recall(c, recallRequest)
if zoneID != 0 {
response.Message[SourceZoneID] = strconv.Itoa(int(zoneID))
}
if len(tagIDs) > 0 {
var params []string
for _, tagID := range tagIDs {
params = append(params, strconv.Itoa(int(tagID)))
}
response.Message[SourceTagIDs] = strings.Join(params, "|")
}
if upMID != 0 {
response.Message[SourceUpMID] = strconv.Itoa(int(upMID))
}
if err != nil || recallResponse == nil {
log.Error("recall service error (%v) or recall response is null", err)
return
}
err = transform(recallResponse, response)
return
}
func tagIDget(c context.Context, recallClient recallv1.RecsysRecallClient, SVID int64) (zoneID int64, tagIDs []int64, upMID int64) {
SVIDs := make([]int64, 0)
SVIDs = append(SVIDs, SVID)
videoIndexRequest := &recallv1.VideoIndexRequest{
SVIDs: SVIDs,
}
videoIndexResponse, err := recallClient.VideoIndex(c, videoIndexRequest)
if err != nil || videoIndexResponse == nil {
log.Error("recall service VideoIndex error (%v) or recall response is null", err)
return
}
for _, forwardIndex := range videoIndexResponse.List {
for _, tag := range forwardIndex.BasicInfo.Tags {
if tag.TagType == 2 {
zoneID = int64(tag.TagID)
} else if tag.TagType == 3 {
tagID := int64(tag.TagID)
tagIDs = append(tagIDs, tagID)
}
}
upMID = int64(forwardIndex.BasicInfo.MID)
}
return
}

View File

@@ -0,0 +1,543 @@
package retrieve
import (
"context"
"github.com/json-iterator/go"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
searchv1 "go-common/app/service/bbq/search/api/grpc/v1"
"go-common/library/log"
"strconv"
"strings"
)
//召回策略
const (
Hot = "hot"
Selection = "selection"
Relevant = "relevant"
Tag = "tag"
)
// RetrieverManager manages multiple retrieve functions
type RetrieverManager struct {
Retrievers []Retriever
PreRetrievers []Retriever
PostRetrievers []Retriever
RetrieveFunc RetrieverFunc
}
//NewRetrieverManager ...
func NewRetrieverManager() (m *RetrieverManager) {
m = &RetrieverManager{
Retrievers: make([]Retriever, 0),
PreRetrievers: make([]Retriever, 0),
PostRetrievers: make([]Retriever, 0),
RetrieveFunc: DefaultRetrieveFunc,
}
hot := &hotRetriever{}
tag := &tagRetriever{}
operation := &operationRetriever{}
relevant := &relevantRetriever{}
m.Retrievers = append(m.Retrievers, hot, tag, operation, relevant)
m.PostRetrievers = append(m.PostRetrievers, hot)
return
}
//Retrievers ...
type Retrievers struct {
MethodName string
Retrieve RetrieverFunc
}
//RetrieverFunc ...
type RetrieverFunc func(c context.Context, r Retriever, searchClient searchv1.SearchClient, request rpc.RecsysRequest, userProfile *model.UserProfile, response chan rpc.RecsysResponse)
//ColdStartRetriever ...
type ColdStartRetriever struct {
Retrievers
}
//Merge ...
func (m *RetrieverManager) Merge(response *rpc.RecsysResponse) {
records := make([]*rpc.RecsysRecord, 0)
set := map[int64]int{}
for _, record := range response.List {
if count, ok := set[record.Svid]; ok {
set[record.Svid] = count + 1
} else {
set[record.Svid] = 1
records = append(records, record)
}
}
response.List = records
records = make([]*rpc.RecsysRecord, 0)
set = map[int64]int{}
for _, record := range response.List {
if avid, ok := record.Map[model.AVID]; ok {
avidInt, _ := strconv.ParseInt(avid, 10, 64)
if count, ok := set[avidInt]; ok {
set[avidInt] = count + 1
} else {
set[avidInt] = 1
records = append(records, record)
}
}
}
response.List = records
}
//Retriever .
type Retriever interface {
name() (name string)
queryRewrite(c context.Context, request rpc.RecsysRequest, userProfile *model.UserProfile) (req *searchv1.RecVideoDataRequest, err error)
}
//Query .
type Query struct {
Calc *Calc `json:"calc"`
Where *Where `json:"where"`
Filter map[string]interface{} `json:"filter"`
From int `json:"from"`
Size int `json:"size"`
}
//Calc .
type Calc struct {
Open int64 `json:"open"`
PlayRatio float64 `json:"play_ratio"`
FavRatio float64 `json:"fav_ratio"`
LikeRatio float64 `json:"like_ratio"`
PubRatio float64 `json:"pub_ratio"`
CoinRatio float64 `json:"coin_ratio"`
ReplyRatio float64 `json:"reply_ratio"`
ShareRatio float64 `json:"share_ratio"`
}
//Where .
type Where struct {
In map[string][]interface{} `json:"in"`
NotIn map[string][]interface{} `json:"not_in"`
}
//DefaultRetrieveFunc is default retrieve function
func DefaultRetrieveFunc(c context.Context, r Retriever, searchClient searchv1.SearchClient, request rpc.RecsysRequest, userProfile *model.UserProfile, response chan rpc.RecsysResponse) {
result := rpc.RecsysResponse{}
req, err := r.queryRewrite(c, request, userProfile)
if err != nil {
log.Error("query rewrite error: ", err)
response <- result
return
}
if req == nil {
response <- result
return
}
res, err := searchClient.RecVideoData(c, req)
if err != nil {
log.Error("Retrieve error: ", err)
response <- result
return
}
for _, videoEsInfo := range res.List {
record := &rpc.RecsysRecord{Map: make(map[string]string)}
fillRecord(record, videoEsInfo)
record.Map[model.Retriever] = r.name()
result.List = append(result.List, record)
}
if r.name() == Relevant {
likeTags := make(map[string]float64)
posTags := make(map[string]float64)
negTags := make(map[string]float64)
for _, record := range result.List {
svid := record.Svid
if _, ok := userProfile.LikeVideos[svid]; ok {
if itemTags, ok := record.Map[model.TagsName]; ok {
for _, tag := range strings.Split(itemTags, "|") {
if tagScore, ok := likeTags[tag]; ok {
likeTags[tag] = tagScore + 1.0
} else {
likeTags[tag] = 1.0
}
}
}
}
if _, ok := userProfile.PosVideos[svid]; ok {
if itemTags, ok := record.Map[model.TagsName]; ok {
for _, tag := range strings.Split(itemTags, "|") {
if tagScore, ok := posTags[tag]; ok {
posTags[tag] = tagScore + 1.0
} else {
posTags[tag] = 1.0
}
}
}
}
if _, ok := userProfile.NegVideos[svid]; ok {
if itemTags, ok := record.Map[model.TagsName]; ok {
for _, tag := range strings.Split(itemTags, "|") {
if tagScore, ok := negTags[tag]; ok {
negTags[tag] = tagScore + 1.0
} else {
negTags[tag] = 1.0
}
}
}
}
}
userProfile.LikeTags = likeTags
userProfile.PosTags = posTags
userProfile.NegTags = negTags
calc := &Calc{
Open: 1,
PlayRatio: 0.3,
FavRatio: 0.05,
LikeRatio: 0.15,
PubRatio: 0.1,
ShareRatio: 0.1,
CoinRatio: 0.1,
ReplyRatio: 0.2,
}
where := new(Where)
where.In = make(map[string][]interface{})
where.NotIn = make(map[string][]interface{})
for _, id := range userProfile.DedupVideos {
where.NotIn[model.CID] = append(where.NotIn[model.CID], id)
}
for _, id := range userProfile.PosVideos {
where.NotIn[model.SVID] = append(where.NotIn[model.SVID], id)
}
for _, id := range userProfile.NegVideos {
where.NotIn[model.SVID] = append(where.NotIn[model.SVID], id)
}
hasTag := false
for tag, score := range posTags {
if score > 0 {
where.In[model.TagsName] = append(where.In[model.TagsName], tag)
hasTag = true
}
}
if !hasTag {
log.Error("Relevant videos has no tag: ")
response <- rpc.RecsysResponse{}
return
}
filter := make(map[string]interface{})
filter["buvid"] = request.BUVID
filter["mid"] = request.MID
query := Query{
Calc: calc,
Where: where,
Filter: filter,
From: 0,
Size: 100,
}
queryBody, _ := jsoniter.Marshal(query)
log.Info(r.name(), string(queryBody))
req = &searchv1.RecVideoDataRequest{Query: string(queryBody)}
res, err := searchClient.RecVideoData(c, req)
if err != nil {
log.Error("Retrieve error: ", err)
response <- rpc.RecsysResponse{}
return
}
result = rpc.RecsysResponse{}
for _, videoEsInfo := range res.List {
record := &rpc.RecsysRecord{Map: make(map[string]string)}
fillRecord(record, videoEsInfo)
record.Map[model.Retriever] = r.name()
result.List = append(result.List, record)
}
}
response <- result
}
type hotRetriever struct {
Retriever
}
func (r *hotRetriever) name() (name string) {
name = Hot
return
}
func (r *hotRetriever) queryRewrite(c context.Context, request rpc.RecsysRequest, userProfile *model.UserProfile) (req *searchv1.RecVideoDataRequest, err error) {
calc := &Calc{
Open: 1,
PlayRatio: 0.3,
FavRatio: 0.05,
LikeRatio: 0.15,
PubRatio: 0.1,
ShareRatio: 0.1,
CoinRatio: 0.1,
ReplyRatio: 0.2,
}
//hotTags := []string{"美女", "性感", "女神", "英雄联盟", "电子竞技", "小姐姐", "LOL"}
//hotTags := []string{"舞蹈", "宅舞", "mmd", "英雄联盟", "电子竞技", "小姐姐", "LOL"}
//hotTags := []string{"mmd"}
where := new(Where)
where.In = make(map[string][]interface{})
where.In[model.State] = append(where.In[model.State], model.State1, model.State0, model.State3, model.State4, model.State5)
where.NotIn = make(map[string][]interface{})
for _, id := range userProfile.DedupVideos {
where.NotIn[model.CID] = append(where.NotIn[model.CID], id)
}
filter := make(map[string]interface{})
filter["buvid"] = request.BUVID
filter["mid"] = request.MID
query := Query{
Calc: calc,
Where: where,
Filter: filter,
From: 0,
Size: 100,
}
queryBody, err := jsoniter.Marshal(query)
log.Info(r.name(), string(queryBody))
req = &searchv1.RecVideoDataRequest{Query: string(queryBody)}
return
}
type tagRetriever struct {
Retriever
}
func (r *tagRetriever) name() (name string) {
name = Tag
return
}
func (r *tagRetriever) queryRewrite(c context.Context, request rpc.RecsysRequest, userProfile *model.UserProfile) (req *searchv1.RecVideoDataRequest, err error) {
if len(userProfile.BiliTags) <= 0 && len(userProfile.Zones1) == 0 && len(userProfile.Zones2) == 0 {
err = nil
return
}
calc := &Calc{
Open: 1,
PlayRatio: 0.3,
FavRatio: 0.05,
LikeRatio: 0.15,
PubRatio: 0.1,
ShareRatio: 0.1,
CoinRatio: 0.1,
ReplyRatio: 0.2,
}
where := new(Where)
where.In = make(map[string][]interface{})
where.In[model.State] = append(where.In[model.State], model.State1, model.State0, model.State3, model.State4, model.State5)
for tag := range userProfile.BiliTags {
tagID, _ := strconv.ParseInt(tag, 10, 64)
where.In[model.TagsID] = append(where.In[model.TagsID], tagID)
}
for tag := range userProfile.Zones1 {
tagID, _ := strconv.ParseInt(tag, 10, 64)
where.In[model.TagsID] = append(where.In[model.TagsID], tagID)
}
for tag := range userProfile.Zones2 {
tagID, _ := strconv.ParseInt(tag, 10, 64)
where.In[model.TagsID] = append(where.In[model.TagsID], tagID)
}
where.NotIn = make(map[string][]interface{})
for _, id := range userProfile.DedupVideos {
where.NotIn[model.CID] = append(where.NotIn[model.CID], id)
}
filter := make(map[string]interface{})
filter["buvid"] = request.BUVID
filter["mid"] = request.MID
query := Query{
Calc: calc,
Where: where,
Filter: filter,
From: 0,
Size: 100,
}
queryBody, err := jsoniter.Marshal(query)
log.Info(r.name(), string(queryBody))
req = &searchv1.RecVideoDataRequest{Query: string(queryBody)}
return
}
type operationRetriever struct {
Retriever
}
func (r *operationRetriever) name() (name string) {
name = Selection
return
}
func (r *operationRetriever) queryRewrite(c context.Context, request rpc.RecsysRequest, userProfile *model.UserProfile) (req *searchv1.RecVideoDataRequest, err error) {
calc := &Calc{
Open: 1,
PlayRatio: 0.3,
FavRatio: 0.05,
LikeRatio: 0.15,
PubRatio: 0.1,
ShareRatio: 0.1,
CoinRatio: 0.1,
ReplyRatio: 0.2,
}
where := new(Where)
where.In = make(map[string][]interface{})
where.In[model.State] = append(where.In[model.State], model.State5)
where.NotIn = make(map[string][]interface{})
for _, id := range userProfile.DedupVideos {
where.NotIn[model.CID] = append(where.NotIn[model.CID], id)
}
filter := make(map[string]interface{})
filter["buvid"] = request.BUVID
filter["mid"] = request.MID
query := Query{
Calc: calc,
Where: where,
Filter: filter,
From: 0,
Size: 100,
}
queryBody, err := jsoniter.Marshal(query)
log.Info(r.name(), string(queryBody))
req = &searchv1.RecVideoDataRequest{Query: string(queryBody)}
return
}
type relevantRetriever struct {
Retriever
}
func (r *relevantRetriever) name() (name string) {
name = Relevant
return
}
func (r *relevantRetriever) queryRewrite(c context.Context, request rpc.RecsysRequest, userProfile *model.UserProfile) (req *searchv1.RecVideoDataRequest, err error) {
//TODO
//userProfile.SessionPosVideos
if len(userProfile.PosVideos) == 0 && len(userProfile.LikeVideos) == 0 {
return
}
calc := &Calc{
Open: 1,
PlayRatio: 0.3,
FavRatio: 0.05,
LikeRatio: 0.15,
PubRatio: 0.1,
ShareRatio: 0.1,
CoinRatio: 0.1,
ReplyRatio: 0.2,
}
where := new(Where)
where.In = make(map[string][]interface{})
where.NotIn = make(map[string][]interface{})
where.In[model.State] = append(where.In[model.State], model.State1, model.State0, model.State3, model.State4, model.State5)
for id := range userProfile.PosVideos {
where.In[model.SVID] = append(where.In[model.SVID], id)
}
for id := range userProfile.NegVideos {
where.In[model.SVID] = append(where.In[model.SVID], id)
}
for id := range userProfile.LikeVideos {
where.In[model.SVID] = append(where.In[model.SVID], id)
}
query := Query{
Calc: calc,
Where: where,
From: 0,
Size: 100,
}
queryBody, err := jsoniter.Marshal(query)
log.Info(r.name(), string(queryBody))
req = &searchv1.RecVideoDataRequest{Query: string(queryBody)}
return
}
func fillRecord(record *rpc.RecsysRecord, videoEsInfo *searchv1.RecVideoInfo) {
record.Svid = videoEsInfo.SVID
record.Map[model.Title] = strings.Replace(videoEsInfo.Title, "\"", " ", -1)
//方便解析成json格式
record.Map[model.Content] = strings.Replace(videoEsInfo.Content, "\"", " ", -1)
record.Map[model.AVID] = strconv.FormatInt(videoEsInfo.AVID, 10)
record.Map[model.CID] = strconv.FormatInt(videoEsInfo.CID, 10)
record.Map[model.SVID] = strconv.FormatInt(videoEsInfo.SVID, 10)
record.Map[model.TID] = strconv.FormatInt(videoEsInfo.Tid, 10)
record.Map[model.SubTid] = strconv.FormatInt(videoEsInfo.SubTid, 10)
record.Map[model.PlayHive] = strconv.FormatInt(videoEsInfo.PlayHive, 10)
record.Map[model.FavHive] = strconv.FormatInt(videoEsInfo.FavHive, 10)
record.Map[model.LikesHive] = strconv.FormatInt(videoEsInfo.LikesHive, 10)
record.Map[model.CoinHive] = strconv.FormatInt(videoEsInfo.CoinHive, 10)
record.Map[model.State] = strconv.FormatInt(videoEsInfo.State, 10)
record.Map[model.UperMid] = strconv.FormatInt(videoEsInfo.MID, 10)
tagNames := make([]string, 0)
tagTypes := make([]string, 0)
tagIDs := make([]string, 0)
for _, tag := range videoEsInfo.Tags {
tagNames = append(tagNames, tag.Name)
tagTypes = append(tagTypes, strconv.Itoa(int(tag.Type)))
tagIDs = append(tagIDs, strconv.Itoa(int(tag.ID)))
}
record.Map[model.TagsName] = strings.Join(tagNames, "|")
record.Map[model.TagsType] = strings.Join(tagTypes, "|")
record.Map[model.TagsID] = strings.Join(tagIDs, "|")
}

View File

@@ -0,0 +1,525 @@
package retrieve
import (
"context"
"fmt"
recallv1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/app/service/bbq/recsys/service/util"
"go-common/library/log"
"strconv"
)
//召回策略
const (
//recall class
HotRecall = "HotRecall"
RandomRecall = "RandomRecall"
SelectionRecall = "SelectionRecall"
UserProfileBili = "UserProfileBili"
UserProfileBBQ = "UserProfileBBQ"
LikeI2IRecall = "LikeI2IRecall"
LikeTagRecall = "LikeTagRecall"
LikeUPRecall = "LikeUPRecall"
PosI2IRecall = "PosI2IRecall"
PosTagRecall = "PosTagRecall"
FollowRecall = "FollowRecall"
_MaxRecallTagCount = 50
//priority
_PriorityVeryHigh = 10000
_PriorityHigh = 1000
_PriorityMid = 100
_PriorityLow = 10
//recall rank method
_RandomScorer = "default"
_TopNLikeVideo = 10
_TopNLikeUp = 10
_TopNLikeTag = 10
_TopNPosVideo = 10
_TopNPosTag = 10
_TopNFollow = 10
_TopNNegTag = 20 // not used as recall tag
)
//RecallKey Prefixes
const (
RecallKeyI2IPrefix = "RECALL:I2I"
RecallKeyTagIDPrefix = "RECALL:HOT_T"
RecallKeyUpIDPrefix = "RECALL:HOT_UP"
)
//Recall redis key
const (
RecallOpVideoKey = "job:bbq:rec:op"
RecallHotDefault = "RECALL:HOT_DEFAULT:0"
_recRecallKeyI2I = "RECALL:I2I:%d"
_recRecallKeyUP = "RECALL:HOT_UP:%d"
_recRecallKeyTagIDTemplateString = "RECALL:HOT_T:%s"
_recRecallKeyTagIDTemplateInt = "RECALL:HOT_T:%d"
_recRecallKeyTagIDNewPubTemplateString = "RECALL:T:%s"
_BloomFilter = "bloomfilter"
)
// RecallManager manages multiple retrieve functions
type RecallManager struct {
Retrievers []Retriever2
}
//Retriever2 ...
type Retriever2 interface {
name() (name string)
queryRewrite(c context.Context, request *recsys.RecsysRequest, userProfile *model.UserProfile) (recallInfo *recallv1.RecallInfo, err error)
}
//RetrieverFuncV2 ...
type RetrieverFuncV2 func(c context.Context, request *recsys.RecsysRequest, userProfile *model.UserProfile, recallClient recallv1.RecsysRecallClient) (response *recsys.RecsysResponse, err error)
//NewRecallManager ...
func NewRecallManager() (m *RecallManager) {
m = &RecallManager{
Retrievers: make([]Retriever2, 0),
}
return
}
//V2RetrieveFunc is default retrieve function
func (m *RecallManager) V2RetrieveFunc(c context.Context, request *recsys.RecsysRequest, userProfile *model.UserProfile, recallClient recallv1.RecsysRecallClient) (response *recsys.RecsysResponse, err error) {
recallInfos := make([]*recallv1.RecallInfo, 0)
for _, retriever := range m.Retrievers {
recallInfo, err := retriever.queryRewrite(c, request, userProfile)
if err == nil && recallInfo != nil {
recallInfos = append(recallInfos, recallInfo)
}
}
// selection 100
selectionRecallInfo := &recallv1.RecallInfo{
Name: SelectionRecall,
Tag: RecallOpVideoKey,
Limit: 100,
Filter: _BloomFilter,
Priority: _PriorityHigh,
Scorer: _RandomScorer,
}
recallInfos = append(recallInfos, selectionRecallInfo)
// 热门
hotRecallInfo := &recallv1.RecallInfo{
Name: HotRecall,
Tag: RecallHotDefault,
Limit: 200,
Filter: _BloomFilter,
Priority: _PriorityHigh,
Scorer: _RandomScorer,
}
recallInfos = append(recallInfos, hotRecallInfo)
// tags
likeVideoMap, likeUPMap, likeTagIDMap, posVideoMap, posTagIDMap := buildSessionFeature(c, userProfile, recallClient)
if request.DebugFlag {
log.Info("posVideoMap = %v", posVideoMap)
}
// bbq实时行为 bbq like i2i
for ID := range likeVideoMap {
recallInfo := &recallv1.RecallInfo{
Name: LikeI2IRecall,
Tag: fmt.Sprintf(_recRecallKeyI2I, ID),
Limit: 40,
Filter: _BloomFilter,
Priority: _PriorityVeryHigh,
}
recallInfos = append(recallInfos, recallInfo)
}
// bbq实时行为 bbq like up
for UpMID := range likeUPMap {
recallInfo := &recallv1.RecallInfo{
Name: LikeUPRecall,
Tag: fmt.Sprintf(_recRecallKeyUP, UpMID),
Limit: 40,
Filter: _BloomFilter,
Priority: _PriorityVeryHigh,
}
recallInfos = append(recallInfos, recallInfo)
}
// bbq实时行为 bbq follow
// x * 5
var followMap map[int64]int64
if len(userProfile.BBQFollowAction) > _TopNFollow {
followUps := util.SortMapByValue(userProfile.BBQFollowAction)[0:_TopNFollow]
followMap = make(map[int64]int64, _TopNFollow)
for _, pair := range followUps {
followMap[pair.Key] = pair.Value
}
} else {
followMap = userProfile.BBQFollowAction
}
for upMID := range followMap {
tagRecallInfo := &recallv1.RecallInfo{
Name: FollowRecall,
Tag: fmt.Sprintf(_recRecallKeyUP, upMID),
Limit: 40,
Filter: _BloomFilter,
Priority: _PriorityHigh,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
for tag := range likeTagIDMap {
tagRecallInfo := &recallv1.RecallInfo{
Name: LikeTagRecall,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateInt, tag),
Limit: 20,
Filter: _BloomFilter,
Priority: _PriorityHigh,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
for tag := range posTagIDMap {
tagRecallInfo := &recallv1.RecallInfo{
Name: PosTagRecall,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateInt, tag),
Limit: 30,
Filter: _BloomFilter,
Priority: _PriorityHigh,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
//for tag := range userProfile.PosTags {
// tagRecallInfo := &recallv1.RecallInfo{
// Name: LikeTagRecall,
// Tag: fmt.Sprintf(_recRecallKeyTemplateTagID, tag),
// Limit: 10,
// Filter: _BloomFilter,
// Priority: _PriorityHigh,
// }
// recallInfos = append(recallInfos, tagRecallInfo)
//}
// bbq user profile: zone
// 20 * 5 = 100
for tag := range userProfile.BBQZones {
zoneRecallInfo := &recallv1.RecallInfo{
Name: UserProfileBBQ,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateString, tag),
Limit: 10,
Filter: _BloomFilter,
Priority: _PriorityMid,
}
recallInfos = append(recallInfos, zoneRecallInfo)
}
// bbq user profile: tag
// 30 * 5 = 150
for tag := range userProfile.BBQTags {
tagRecallInfo := &recallv1.RecallInfo{
Name: UserProfileBBQ,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateString, tag),
Limit: 20,
Filter: _BloomFilter,
Priority: _PriorityMid,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
// bbq user profile: follow
// x * 5
for upMID := range userProfile.BBQPrefUps {
tagRecallInfo := &recallv1.RecallInfo{
Name: UserProfileBBQ,
Tag: fmt.Sprintf(_recRecallKeyUP, upMID),
Limit: 10,
Filter: _BloomFilter,
Priority: _PriorityMid,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
// bili user profile: zone2
// 20 * 5 = 100
for tag := range userProfile.Zones2 {
zoneRecallInfo := &recallv1.RecallInfo{
Name: UserProfileBili,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateString, tag),
Limit: 10,
Filter: _BloomFilter,
Priority: _PriorityMid,
}
recallInfos = append(recallInfos, zoneRecallInfo)
}
// bili user profile: tag
// 30 * 5 = 150
tagRecallSize := 20
if len(userProfile.BBQTags) > 0 {
tagRecallSize = 10
}
for tag := range userProfile.BiliTags {
tagRecallInfo := &recallv1.RecallInfo{
Name: UserProfileBili,
Tag: fmt.Sprintf(_recRecallKeyTagIDTemplateString, tag),
Limit: int32(tagRecallSize),
Filter: _BloomFilter,
Priority: _PriorityMid,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
// 召回标签较大的情况下,减少热门召回
if len(recallInfos) >= 5 {
for _, recallInfo := range recallInfos {
if recallInfo.Name == HotRecall {
recallInfo.Limit = 50
}
}
}
//recallTagNameMap := make(map[string][]string)
//recallTagInfoMap := make(map[string]*recallv1.RecallInfo)
//recallTagPriorityMap := make(map[string]int32)
//
//for _, recallInfo := range recallInfos {
// names := recallTagNameMap[recallInfo.Tag]
// names = append(names, recallInfo.Name)
// recallTagNameMap[recallInfo.Tag] = names
// recallTagInfoMap[recallInfo.Tag] = recallInfo
//
// if priority, ok := recallTagPriorityMap[recallInfo.Tag]; ok {
// if recallInfo.Priority > priority {
// recallTagPriorityMap[recallInfo.Tag] = priority
// }
// } else {
// recallTagPriorityMap[recallInfo.Tag] = priority
// }
//}
//
//newRecallInfos := make([]*recallv1.RecallInfo, 0)
//for tag, names := range recallTagNameMap {
// recallInfo := recallTagInfoMap[tag]
// recallInfo.Name = strings.Join(names, "|")
// newRecallInfos = append(newRecallInfos, recallInfo)
//}
recallInfos = mergeRecallKey(recallInfos)
recallKeyCount := len(recallInfos)
// 老用户增加随机召回或者N刷之后??? TODO
if len(userProfile.LastRecords) >= 20 && recallKeyCount < _MaxRecallTagCount && len(userProfile.BBQTags) > 0 && len(userProfile.BiliTags) > 0 {
tagCountMap := make(map[string]int)
for tag := range userProfile.BBQTags {
count := tagCountMap[tag]
tagCountMap[tag] = count + 1
}
for tag := range userProfile.BiliTags {
count := tagCountMap[tag]
tagCountMap[tag] = count + 1
}
randomTagCount := 0
for tag := range tagCountMap {
if randomTagCount > 10 {
break
}
randomTagCount++
tagRecallInfo := &recallv1.RecallInfo{
Name: RandomRecall,
Tag: fmt.Sprintf(_recRecallKeyTagIDNewPubTemplateString, tag),
Limit: 10,
Filter: _BloomFilter,
Priority: _PriorityLow,
Scorer: _RandomScorer,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
}
//merge random recall
recallInfos = mergeRecallKey(recallInfos)
recallRequest := &recallv1.RecallRequest{
MID: request.MID,
BUVID: request.BUVID,
TotalLimit: 500,
Infos: recallInfos,
}
log.Info("recall key count is (%v), recall request: (%v)", len(recallInfos), recallRequest)
response = new(recsys.RecsysResponse)
response.Message = make(map[string]string)
response.Message[model.QueryID] = request.QueryID
recallResponse, err := recallClient.Recall(c, recallRequest)
if err != nil || recallResponse == nil {
log.Error("recall service error (%v) or recall response is null, traceID is %v", err, request.TraceID)
return
}
if len(recallResponse.List) < int(request.Limit) {
response.Message[model.ResponseDownGrade] = "1"
log.Error("response size less then (%v), is (%v), , traceID is (%v)", request.Limit, recallResponse.Total, request.TraceID)
recallResponse, err = downGradeRecall(c, recallRequest, recallClient)
if err != nil || recallResponse == nil {
log.Error("down grade recall service error (%v) or recall response is null, traceID is (%v)", err, request.TraceID)
return
}
}
response.Message[model.ResponseRecallCount] = strconv.Itoa(len(recallResponse.List))
err = transform(recallResponse, response)
deleteBlack(response, userProfile)
return
}
func downGradeRecall(c context.Context, recallRequest *recallv1.RecallRequest, recallClient recallv1.RecsysRecallClient) (recallResponse *recallv1.RecallResponse, err error) {
for _, recallInfo := range recallRequest.Infos {
recallInfo.Filter = ""
recallInfo.Scorer = _RandomScorer
}
log.Info("down grade recall request: (%v)", recallRequest)
recallResponse, err = recallClient.Recall(c, recallRequest)
return
}
func buildSessionFeature(c context.Context, userProfile *model.UserProfile, recallClient recallv1.RecsysRecallClient) (likeVideoMap map[int64]int64, likeUPMap map[int64]int64, likeTagIDMap map[int64]int64, posVideoMap map[int64]int64, posTagIDMap map[int64]int64) {
if len(userProfile.LikeVideos) > 0 || len(userProfile.PosVideos) > 0 || len(userProfile.NegVideos) > 0 {
SVIDs := make([]int64, 0)
for SVID := range userProfile.LikeVideos {
SVIDs = append(SVIDs, SVID)
}
for SVID := range userProfile.PosVideos {
SVIDs = append(SVIDs, SVID)
}
for SVID := range userProfile.NegVideos {
SVIDs = append(SVIDs, SVID)
}
videoIndexRequest := &recallv1.VideoIndexRequest{
SVIDs: SVIDs,
}
videoIndexResponse, err := recallClient.VideoIndex(c, videoIndexRequest)
if err != nil || videoIndexResponse == nil {
log.Error("recall service VideoIndex error (%v) or recall response is null", err)
return
}
for _, forwardIndex := range videoIndexResponse.List {
SVIDInt := int64(forwardIndex.SVID)
if timestamp, ok := userProfile.LikeVideos[SVIDInt]; ok {
for _, tag := range forwardIndex.BasicInfo.Tags {
tagID := int64(tag.TagID)
if count, ok := userProfile.LikeTagIDs[tagID]; ok {
userProfile.LikeTagIDs[tagID] = count + 1
} else {
userProfile.LikeTagIDs[tagID] = 1
}
}
upMID := int64(forwardIndex.BasicInfo.MID)
userProfile.LikeUPs[upMID] = timestamp
}
}
for _, forwardIndex := range videoIndexResponse.List {
SVIDInt := int64(forwardIndex.SVID)
if _, ok := userProfile.PosVideos[SVIDInt]; ok {
for _, tag := range forwardIndex.BasicInfo.Tags {
tagID := int64(tag.TagID)
if count, ok := userProfile.PosTagIDs[tagID]; ok {
userProfile.PosTagIDs[tagID] = count + 1
} else {
userProfile.PosTagIDs[tagID] = 1
}
}
}
}
for _, forwardIndex := range videoIndexResponse.List {
SVIDInt := int64(forwardIndex.BasicInfo.SVID)
if _, ok := userProfile.NegVideos[SVIDInt]; ok {
for _, tag := range forwardIndex.BasicInfo.Tags {
tagID := int64(tag.TagID)
if count, ok := userProfile.NegTagIDs[tagID]; ok {
userProfile.NegTagIDs[tagID] = count + 1
} else {
userProfile.NegTagIDs[tagID] = 1
}
}
}
}
if len(userProfile.LikeVideos) > _TopNLikeVideo {
likeVideos := util.SortMapByValue(userProfile.LikeVideos)[0:_TopNLikeVideo]
likeVideoMap = make(map[int64]int64, _TopNLikeVideo)
for _, pair := range likeVideos {
likeVideoMap[pair.Key] = pair.Value
}
} else {
likeVideoMap = userProfile.LikeVideos
}
if len(userProfile.LikeUPs) > _TopNLikeUp {
likeUPs := util.SortMapByValue(userProfile.LikeUPs)[0:_TopNLikeUp]
likeUPMap = make(map[int64]int64, _TopNLikeUp)
for _, pair := range likeUPs {
likeUPMap[pair.Key] = pair.Value
}
} else {
likeUPMap = userProfile.LikeUPs
}
if len(userProfile.LikeTagIDs) > _TopNLikeTag {
likeTags := util.SortMapByValue(userProfile.LikeTagIDs)[0:_TopNLikeTag]
likeTagIDMap = make(map[int64]int64, _TopNLikeTag)
for _, pair := range likeTags {
likeTagIDMap[pair.Key] = pair.Value
}
} else {
likeTagIDMap = userProfile.LikeTagIDs
}
if len(userProfile.PosVideos) > _TopNPosVideo {
videos := util.SortMapByValue(userProfile.PosVideos)[0:_TopNPosVideo]
posVideoMap = make(map[int64]int64, _TopNPosVideo)
for _, pair := range videos {
posVideoMap[pair.Key] = pair.Value
}
} else {
posVideoMap = userProfile.PosVideos
}
//TODO 正负反馈标签考虑次数和发生时间
if len(userProfile.PosTagIDs) > _TopNPosTag {
posTags := util.SortMapByValue(userProfile.PosTagIDs)[0:_TopNPosTag]
posTagIDMap = make(map[int64]int64, _TopNPosTag)
for _, pair := range posTags {
posTagIDMap[pair.Key] = pair.Value
}
userProfile.PosTagIDs = posTagIDMap
} else {
posTagIDMap = userProfile.PosTagIDs
}
if len(userProfile.NegTagIDs) > _TopNNegTag {
tags := util.SortMapByValue(userProfile.NegTagIDs)[0:_TopNNegTag]
tagIDMap := make(map[int64]int64, _TopNNegTag)
for _, pair := range tags {
tagIDMap[pair.Key] = pair.Value
}
userProfile.NegTagIDs = tagIDMap
}
}
return
}

View File

@@ -0,0 +1,107 @@
package retrieve
import (
"context"
"fmt"
"go-common/app/service/main/relation/api"
"go-common/library/log"
recallv1 "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
)
const BiliFollowsRecall = "BiliFollowsRecall"
func (m *RecallManager) UpsRec(c context.Context, request *recsys.RecsysRequest, userProfile *model.UserProfile, recallClient recallv1.RecsysRecallClient, relationClient api.RelationClient) (response *recsys.RecsysResponse, err error) {
recallInfos := make([]*recallv1.RecallInfo, 0)
// generate recall infos
// bbq user profile: bili followups,highest priority
// x * 50
biliFollowings := getFollowings(c, relationClient, userProfile.Mid)
bbqFollowings := userProfile.BBQFollow
notFollowedBiliUpsCount := 0
for _, upMID := range biliFollowings { //for every upMID in bili test if its in bbq
if _, ok := bbqFollowings[upMID]; !ok {
notFollowedBiliUpsCount++
tagRecallInfo := &recallv1.RecallInfo{
Name: BiliFollowsRecall,
Tag: fmt.Sprintf(_recRecallKeyUP, upMID),
Limit: 50,
Filter: "",
Priority: _PriorityVeryHigh,
}
recallInfos = append(recallInfos, tagRecallInfo)
}
if notFollowedBiliUpsCount >= 48 {
break
}
}
// selection 100
selectionRecallInfo := &recallv1.RecallInfo{
Name: SelectionRecall,
Tag: RecallOpVideoKey,
Limit: 100,
Filter: "",
Priority: _PriorityHigh,
Scorer: _RandomScorer,
}
recallInfos = append(recallInfos, selectionRecallInfo)
// 热门
hotRecallInfo := &recallv1.RecallInfo{
Name: HotRecall,
Tag: RecallHotDefault,
Limit: 200,
Filter: "",
Priority: _PriorityHigh,
Scorer: _RandomScorer,
}
recallInfos = append(recallInfos, hotRecallInfo)
//recall request
recallRequest := &recallv1.RecallRequest{
MID: request.MID,
BUVID: request.BUVID,
TotalLimit: 500,
Infos: recallInfos,
}
log.Info("recall key count is (%v), recall request: (%v)", len(recallInfos), recallRequest)
response = new(recsys.RecsysResponse)
response.Message = make(map[string]string)
// do real request action
recallResponse, err := recallClient.Recall(c, recallRequest)
//todo down grade recall
//todo limit result from bili followups to 100
if err != nil || recallResponse == nil {
log.Error("recall service error (%v) or recall response is null, traceID is %v", err, request.TraceID)
return
} else if recallResponse.List == nil {
log.Warn("recall service did not return any result")
response.List = make([]*recsys.RecsysRecord, 0)
return
}
err = transform(recallResponse, response)
return
}
func getFollowings(c context.Context, relationClient api.RelationClient, mid int64) (followings []int64) {
followings = make([]int64, 0)
midReq := api.MidReq{Mid: mid}
followingsReply, err := relationClient.Followings(c, &midReq)
if err != nil {
if followingsReply.FollowingList != nil {
for _, followingReply := range followingsReply.FollowingList {
mid := followingReply.Mid
followings = append(followings, mid)
}
}
}
return
}

View File

@@ -0,0 +1,56 @@
package service
import (
"context"
"go-common/app/service/bbq/recsys/conf"
"go-common/app/service/bbq/recsys/dao"
postProcess "go-common/app/service/bbq/recsys/service/postprocess"
"go-common/app/service/bbq/recsys/service/rank"
"go-common/app/service/bbq/recsys/service/retrieve"
"go-common/library/log/infoc"
"go-common/library/stat/prom"
)
// Service struct
type Service struct {
c *conf.Config
dao *dao.Dao
infoc *infoc.Infoc
retrieverManager *retrieve.RetrieverManager
recallManager *retrieve.RecallManager
rankManager *RankManager
rankModelManager *rank.RankModelManager
filterManager *FilterManager
postProcessor *postProcess.PostProcessor
//monitor
businessInfoCount *prom.Prom
}
// New init
func New(c *conf.Config) (s *Service) {
s = &Service{
c: c,
dao: dao.New(c),
infoc: infoc.New(c.Infoc),
retrieverManager: retrieve.NewRetrieverManager(),
recallManager: retrieve.NewRecallManager(),
rankManager: NewRankManager(),
rankModelManager: rank.NewRankModelManager(),
filterManager: NewFilterManager(),
postProcessor: postProcess.NewPostProcessor(),
businessInfoCount: prom.BusinessInfoCount,
}
return s
}
// Ping Service
func (s *Service) Ping(c context.Context) (err error) {
return s.dao.Ping(c)
}
// Close Service
func (s *Service) Close() {
s.dao.Close()
}

View File

@@ -0,0 +1,49 @@
package service
import (
"fmt"
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/app/service/bbq/recsys/model"
"go-common/library/log"
"time"
"github.com/json-iterator/go"
)
//StoreLog ...stores request and response log
func (s *Service) StoreLog(request *recsys.RecsysRequest, response *recsys.RecsysResponse, user *model.UserProfile, business string) {
reqString, _ := jsoniter.MarshalToString(request)
responseBytes, _ := jsoniter.Marshal(response)
ext := make(map[string]string)
ext["queryid"] = request.QueryID
ext["traceid"] = request.TraceID
ext["rankmodel"] = response.Message[model.RankModelName]
ext[model.ResponseDownGrade] = response.Message[model.ResponseDownGrade]
extString, _ := jsoniter.MarshalToString(ext)
s.infoc.Info(request.MID, request.BUVID, time.Now().Unix(), reqString, string(responseBytes), request.Abtest, business, extString)
// 5.4 reduce record keys
records := make([]*recsys.RecsysRecord, 0)
for _, record := range response.List {
newRecord := &recsys.RecsysRecord{
Svid: record.Svid,
Score: record.Score,
Map: make(map[string]string),
}
newRecord.Map[model.RecallClasses] = record.Map[model.RecallClasses]
newRecord.Map[model.RecallTags] = record.Map[model.RecallTags]
newRecord.Map[model.AVID] = record.Map[model.AVID]
newRecord.Map[model.CID] = record.Map[model.CID]
newRecord.Map[model.State] = record.Map[model.State]
newRecord.Map[model.ScatterTag] = record.Map[model.ScatterTag]
newRecord.Map[model.UperMid] = record.Map[model.UperMid]
newRecord.Map[model.Title] = record.Map[model.Title]
records = append(records, newRecord)
}
response.List = records
responseStr, _ := jsoniter.MarshalToString(response)
log.Info(fmt.Sprintf("response log: mid:%v, buvid:%v, %v, %v, %v, %v, %v, %v", request.MID, request.BUVID, time.Now().Unix(), reqString, responseStr, request.Abtest, business, extString))
}

View File

@@ -0,0 +1,90 @@
package service
import (
"context"
"fmt"
"github.com/json-iterator/go"
rpc "go-common/app/service/bbq/recsys/api/grpc/v1"
"go-common/library/log"
"go-common/library/net/trace"
)
//UpsRecService
func (s *Service) UpsRecService(c context.Context, req *rpc.RecsysRequest) (response *rpc.RecsysResponse, err error) {
//请求日志
data1, err := jsoniter.Marshal(req)
if err == nil {
log.Info("upsrec request is %s:", data1)
}
// 0.0 pre process: ab test
// 0.0 pre process
tracer, _ := trace.FromContext(c)
req.TraceID = fmt.Sprintf("%s", tracer)
// 1.0 get user profile
userProfile, err := s.dao.LoadUserProfile(c, req.MID, req.BUVID)
if req.MID != 0 {
if err = s.dao.GetUserFollow(c, req.MID, userProfile); err != nil {
log.Errorv(c, log.KV("userLog", "query user follow fail"), log.KV("MID", req.MID))
err = nil
}
if err = s.dao.GetUserBlack(c, req.MID, userProfile); err != nil {
log.Errorv(c, log.KV("userLog", "query user black fail"), log.KV("MID", req.MID))
err = nil
}
}
//2.0 retrieve
response, err = s.recallManager.UpsRec(c, req, userProfile, s.dao.RecallClient, s.dao.RelationClient)
//if err != nil {
//
//}
// 3.0 filter
s.filterManager.upsFilter(req, response, userProfile)
// 4.0 ranker
// 4.0.0
// 4.0.1 prepare feature
// 4.0.2 do rank
// todo rank model
s.rankManager.rank(c, req, response, userProfile, s.dao)
// 5.0 post process, apply rule, page, store results
err = s.postProcessor.ProcessUpsRec(c, req, response, userProfile)
size := len(response.List)
if size == 0 {
log.Error("Ups response is empty!")
response = &rpc.RecsysResponse{
Message: make(map[string]string),
List: make([]*rpc.RecsysRecord, 0),
}
response.Message["info"] = "Ups response is empty!"
return
}
//debug log
if req.DebugFlag {
data, _ := jsoniter.Marshal(userProfile)
response.Message["UserInfo"] = string(data)
return
}
// 5.2 page
limit := int(req.Limit)
if limit > size {
limit = size
}
response.List = response.List[0:limit]
// 5.3 store results
s.dao.StoreRecResults(c, userProfile, req.MID, req.BUVID, response, s.dao.LastUpsPageRedisKey, userProfile.LastUpsRecords)
s.StoreLog(req, response, userProfile, "upsrec")
return
}

View File

@@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["util.go"],
importpath = "go-common/app/service/bbq/recsys/service/util",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//app/service/bbq/recsys/api/grpc/v1:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,104 @@
package util
import "sort"
import (
recsys "go-common/app/service/bbq/recsys/api/grpc/v1"
"math"
)
//Pair A data structure to hold a key/value pair.
type Pair struct {
Key int64
Value int64
}
//PairList A slice of Pairs that implements sort.Interface to sort by Value in descending order.
type PairList []Pair
func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p PairList) Len() int { return len(p) }
func (p PairList) Less(i, j int) bool { return p[i].Value > p[j].Value }
//SortMapByValue A function to turn a map into a PairList, then sort and return it.
func SortMapByValue(m map[int64]int64) PairList {
p := make(PairList, len(m))
i := 0
for k, v := range m {
p[i] = Pair{k, v}
i++
}
sort.Sort(p)
return p
}
//PairStr A data structure to hold a key/value pair.
type PairStr struct {
Key string
Value string
}
//PairStrList A slice of Pairs that implements sort.Interface to sort by Value in descending order.
type PairStrList []PairStr
func (p PairStrList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p PairStrList) Len() int { return len(p) }
func (p PairStrList) Less(i, j int) bool { return p[i].Value > p[j].Value }
//SortStrMapByValue A function to turn a map into a PairList, then sort and return it.
func SortStrMapByValue(m map[string]string) PairStrList {
p := make(PairStrList, len(m))
i := 0
for k, v := range m {
p[i] = PairStr{k, v}
i++
}
sort.Sort(p)
return p
}
//PairStrInt A data structure to hold a key/value pair.
type PairStrInt struct {
Key string
Value int
}
//PairStrIntList A slice of Pairs that implements sort.Interface to sort by Value in descending order.
type PairStrIntList []PairStrInt
func (p PairStrIntList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p PairStrIntList) Len() int { return len(p) }
func (p PairStrIntList) Less(i, j int) bool { return p[i].Value > p[j].Value }
//SortStrIntMapByValue A function to turn a map into a PairList, then sort and return it.
func SortStrIntMapByValue(m map[string]int) PairStrIntList {
p := make(PairStrIntList, len(m))
i := 0
for k, v := range m {
p[i] = PairStrInt{k, v}
i++
}
sort.Sort(p)
return p
}
//Records sort
type Records []*recsys.RecsysRecord
func (a Records) Len() int { return len(a) }
func (a Records) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a Records) Less(i, j int) bool { return a[i].Score < a[j].Score }
//ScoreCount ...
func ScoreCount(count float64) (score float64) {
//score = math.Min(float64(count), 3.0)
maxCount := 10.0
count = math.Min(count, maxCount)
score = (1 + 0.1*count) / (1 + 0.1*maxCount)
return
}
//ScoreTimeDiff ...
func ScoreTimeDiff(timeDiff float64) (score float64) {
score = 1 - 0.56*math.Pow(timeDiff/3600, 0.06)
return
}