go-common/app/service/live/recommend/internal/dao/dao.go
2019-04-22 18:49:16 +08:00

466 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package dao
import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
"go-common/app/service/live/recommend/internal/conf"
"go-common/app/service/live/recommend/recconst"
relation_api "go-common/app/service/live/relation/api/liverpc"
room_api "go-common/app/service/live/room/api/liverpc"
"go-common/library/cache/redis"
"go-common/library/log"
"go-common/library/net/rpc/liverpc"
)
var _userRecCandidateKey = "rec_candidate_%d"
var _recommendOffsetKey = "rec_offset_%d"
// 已经推荐过的池子,用户+日期
var _recommendedKey = "recommended_%d_%s"
// RoomAPI room liverpc client
var RoomAPI *room_api.Client
// RelationAPI relation liverpc client
var RelationAPI *relation_api.Client
// Dao dao
type Dao struct {
c *conf.Config
redis *redis.Pool
}
func init() {
RoomAPI = room_api.New(getConf("room"))
RelationAPI = relation_api.New(getConf("relation"))
}
func getConf(appName string) *liverpc.ClientConfig {
c := conf.Conf.LiveRpc
if c != nil {
return c[appName]
}
return nil
}
// ClearRecommend 清空该用户相关的推荐缓存
func (d *Dao) ClearRecommend(ctx context.Context, uid int64) error {
candidateKey := fmt.Sprintf(_userRecCandidateKey, uid)
recommendedKey := fmt.Sprintf(_recommendedKey, uid, time.Now().Format("20060102"))
offsetKey := fmt.Sprintf(_recommendOffsetKey, uid)
conn := d.redis.Get(ctx)
defer conn.Close()
_, err := conn.Do("DEL", candidateKey, recommendedKey, offsetKey)
return errors.WithStack(err)
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
dao = &Dao{
c: c,
redis: redis.NewPool(c.Redis),
}
return
}
// Close close the resource.
func (d *Dao) Close() {
d.redis.Close()
}
func (d *Dao) saveOffset(conn redis.Conn, uid int64, offset int) {
conn.Do("SETEX", fmt.Sprintf(_recommendOffsetKey, uid), 86400, offset)
}
func (d *Dao) addToRecommended(conn redis.Conn, uid int64, ids []int64) {
if len(ids) == 0 {
return
}
day := time.Now().Format("20060102")
key := fmt.Sprintf(_recommendedKey, uid, day)
var is []interface{}
is = append(is, key)
for _, id := range ids {
is = append(is, id)
}
conn.Send("EXPIRE", key, 86400)
conn.Send("SADD", is...)
conn.Flush()
conn.Receive()
_, err := conn.Receive()
if err != nil {
log.Info("addToRecommended error +%v", err)
}
}
// GetRandomRoomIds 随机获取count个推荐
// 如果总数量total比count小则返回total个
func (d *Dao) GetRandomRoomIds(ctx context.Context, uid int64, reqCount int, existRoomIDs []int64) (ret []int64, err error) {
if reqCount == 0 {
return
}
var (
candidateLen int
)
r := d.redis.Get(ctx)
defer r.Close()
candidateKey := fmt.Sprintf(_userRecCandidateKey, uid)
exists, err := redis.Int(r.Do("exists", candidateKey))
if err != nil {
err = errors.WithStack(err)
return
}
existMap := map[int64]struct{}{}
for _, id := range existRoomIDs {
existMap[id] = struct{}{}
}
if exists == 0 {
var candidate []int64
var currentOffset = 0
candidate, err = d.generateLrCandidateList(r, uid, candidateKey)
if err != nil {
return
}
Loop:
for len(ret) < reqCount && currentOffset < len(candidate) {
var tmp []int64
if len(candidate)-currentOffset < int(reqCount) {
tmp = candidate[currentOffset:]
} else {
tmp = candidate[currentOffset : currentOffset+reqCount]
}
//去重
for _, id := range tmp {
_, ok := existMap[id]
currentOffset += 1
if !ok {
ret = append(ret, id)
if len(ret) >= int(reqCount) {
break Loop
}
}
}
}
d.addToRecommended(r, uid, ret)
d.saveOffset(r, uid, currentOffset)
} else {
candidateLen, err = redis.Int(r.Do("LLEN", candidateKey))
if err != nil {
return
}
var offset int
offset, _ = redis.Int(r.Do("GET", fmt.Sprintf(_recommendOffsetKey, uid)))
if offset > (candidateLen - 1) {
return
}
var currentOffset = offset
Loop2:
for len(ret) < reqCount && currentOffset < candidateLen {
var ids []int64
ids, err = redis.Int64s(r.Do("LRANGE", candidateKey, currentOffset, currentOffset+reqCount-1))
if err != nil {
err = errors.WithStack(err)
return
}
// 去重
for _, id := range ids {
currentOffset++
_, ok := existMap[id]
if !ok {
ret = append(ret, id)
if len(ret) >= int(reqCount) {
break Loop2
}
}
}
if len(ids) == 0 {
log.Error("Cannot get recommend candidate, key=%s, offset=%d, count=%d", candidateKey, offset, reqCount)
break
}
}
d.addToRecommended(r, uid, ret)
d.saveOffset(r, uid, currentOffset)
}
return
}
// GetLrRecRoomIds 在GetRandomRoomIds的基础上进行LR计算并返回倒排的房间号列表
// 与GetRandomRoomIds有相同的输入输出结构
func (d *Dao) GetLrRecRoomIds(r redis.Conn, uid int64, candidateIds []int64) (ret []int64, err error) {
var areas string
areaIds := map[int64]struct{}{}
areas, err = redis.String(r.Do("GET", fmt.Sprintf(recconst.UserAreaKey, uid)))
if err != nil && err != redis.ErrNil {
log.Error("redis GET error: %v", err)
return
}
err = nil
if areas != "" {
split := strings.Split(areas, ";")
for _, areaIdStr := range split {
areaId, _ := strconv.ParseInt(areaIdStr, 10, 64)
areaIds[areaId] = struct{}{}
}
}
weightVector := makeWeightVec(d.c)
roomFeatures, ok := roomFeatureValue.Load().(map[int64][]int64)
if !ok {
ret = candidateIds
return
}
roomScoreSlice := ScoreSlice{}
for _, roomId := range candidateIds {
if fv, ok := roomFeatures[roomId]; ok {
featureVector := make([]int64, len(fv))
copy(featureVector, fv)
areaId := featureVector[0]
if _, ok := areaIds[areaId]; ok {
featureVector[0] = 1
} else {
featureVector[0] = 0
}
counter := Counter{roomId: roomId, score: calcScore(weightVector, featureVector)}
roomScoreSlice = append(roomScoreSlice, counter)
}
}
sort.Sort(roomScoreSlice)
for _, counter := range roomScoreSlice {
ret = append(ret, counter.roomId)
}
return
}
// generateCandidateList 得到候选集
func (d *Dao) generateCandidateList(r redis.Conn, uid int64, candidateKey string) (ret []int64, err error) {
// 第一步 itemcf优先级最高。
itemCFKey := fmt.Sprintf(recconst.UserItemCFRecKey, uid)
var itemCFList []int64
itemCFList, err = redis.Int64s(r.Do("ZREVRANGE", itemCFKey, 0, -1))
if err != nil {
err = errors.WithStack(err)
return
}
itemCFOnlineIds := d.FilterOnlineRoomIds(itemCFList)
if len(itemCFOnlineIds) == 0 {
log.Info("No item-cf room online for user, uid=%d, before online filter room ids: %+v", uid, itemCFList)
}
// 第二步 取兴趣分区的房间 人气超过100的房间
var areas string
areas, err = redis.String(r.Do("GET", fmt.Sprintf(recconst.UserAreaKey, uid)))
if err != nil && err != redis.ErrNil {
err = errors.WithStack(err)
return
}
err = nil
var areaRoomIDs []int64
if areas != "" {
split := strings.Split(areas, ";")
for _, areaIdStr := range split {
areaId, _ := strconv.ParseInt(areaIdStr, 10, 64)
var ids = d.getAreaRoomIds(areaId)
areaRoomIDs = append(areaRoomIDs, ids...)
}
}
// 第三步 取兴趣分区大分区的100个 先不做
// 第四步 减去已经推荐过的
day := time.Now().Format("20060102")
var recommendedList []int64
edKey := fmt.Sprintf(_recommendedKey, uid, day)
recommendedList, err = redis.Int64s(r.Do("SMEMBERS", edKey))
if err != nil {
err = errors.WithStack(err)
return
}
recommended := map[int64]struct{}{}
for _, id := range recommendedList {
recommended[id] = struct{}{}
}
var itemCFFinalIDs []int64
for _, id := range itemCFOnlineIds {
_, exist := recommended[id]
if !exist {
itemCFFinalIDs = append(itemCFFinalIDs, id)
}
}
var areaRoomFinalIDs []int64
for _, id := range areaRoomIDs {
_, exist := recommended[id]
if !exist {
areaRoomFinalIDs = append(areaRoomFinalIDs, id)
}
}
ret = mergeArr(itemCFFinalIDs, areaRoomFinalIDs)
log.Info("UserRecommend : uid=%d total=%d, "+
"itemcf.original=%d, itemcf.online=%d, itemcf.noviewd=%d, "+
"areaRoom.original=%d, itemcf.noviewd=%d viewed=%d",
uid, len(ret), len(itemCFList), len(itemCFOnlineIds), len(itemCFFinalIDs),
len(areaRoomIDs), len(areaRoomFinalIDs), len(recommendedList))
return
}
// generateCandidateList 得到进过LR的候选集
func (d *Dao) generateLrCandidateList(r redis.Conn, uid int64, candidateKey string) (ret []int64, err error) {
roomIDs, err := d.generateCandidateList(r, uid, candidateKey)
if err != nil {
log.Error("generateLrCandidateList failed 1, error:%v", err)
return
}
if len(ret) > 0 {
ret, err = d.GetLrRecRoomIds(r, uid, roomIDs)
if err != nil {
log.Error("generateLrCandidateList failed 2, error:%v", err)
return
}
}
// 召回源不足的情况下补足推荐房间数
if len(ret) < 150 {
ids, ok := recDefaultRoomIds.Load().([]int64)
if !ok {
return
}
ret1, err1 := d.GetLrRecRoomIds(r, uid, ids)
if err1 != nil {
log.Error("generateLrCandidateList failed 3, error:%v", err1)
return
}
ret = mergeArrWithOrder(ret, ret1, 150) // TODO:当前ret1的结果是没有过滤掉今天看过的房间的, 看后面是否需要优化
}
{
for _, roomID := range ret {
r.Send("RPUSH", candidateKey, roomID)
}
r.Send("EXPIRE", candidateKey, 60*2)
err = r.Flush()
if err != nil {
err = errors.WithStack(err)
return
}
for i := 0; i < len(ret)+1; i++ {
r.Receive()
}
}
return
}
// Ping dao ping
func (d *Dao) Ping(ctx context.Context) (err error) {
conn := d.redis.Get(ctx)
defer conn.Close()
_, err = conn.Do("ping")
if err != nil {
err = errors.Wrap(err, "dao Ping err")
}
return err
}
// Counter 房间-分数结构体, 用于构建一个可排序的slice
type Counter struct {
roomId int64
score float32
}
// ScoreSlice Counter对象的slice
type ScoreSlice []Counter
func (s ScoreSlice) Len() int {
return len(s)
}
func (s ScoreSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s ScoreSlice) Less(i, j int) bool {
return s[j].score < s[i].score
}
func calcScore(weightVector []float32, featureVector []int64) (score float32) {
if len(weightVector) != len(featureVector) {
panic(fmt.Sprintf("权重数量和特征数量不匹配, 请检查配置或逻辑, weight: %+v, feature: %+v", weightVector, featureVector))
}
for i := 0; i < min(len(weightVector), len(featureVector)); i++ {
score += weightVector[i] * float32(featureVector[i])
}
return
}
func min(x int, y int) int {
if x < y {
return x
}
return y
}
// 合并两个集合
func mergeArr(x []int64, y []int64) (ret []int64) {
tmpMap := map[int64]struct{}{}
for _, id := range x {
tmpMap[id] = struct{}{}
}
for _, id := range y {
tmpMap[id] = struct{}{}
}
for id := range tmpMap {
ret = append(ret, id)
}
return
}
// 按x, y的顺序合并两个集合, 当x的长度不小于limit则直接返回
func mergeArrWithOrder(x []int64, y []int64, limit int) (ret []int64) {
if len(x) >= limit {
ret = x
return
}
tmpMap := map[int64]struct{}{}
ret = append(ret, x...)
num := len(ret)
for _, id := range x {
tmpMap[id] = struct{}{}
}
for _, id := range y {
if _, ok := tmpMap[id]; ok {
continue
}
num += 1
tmpMap[id] = struct{}{}
ret = append(ret, id)
if num >= limit {
break
}
}
return
}
func makeWeightVec(c *conf.Config) (ret []float32) {
ret = append(ret, c.CommonFeature.UserAreaInterest.Weights...)
ret = append(ret, c.CommonFeature.FansNum.Weights...)
ret = append(ret, c.CommonFeature.CornerSign.Weights...)
ret = append(ret, c.CommonFeature.Online.Weights...)
return
}