204 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			204 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
/*
 | 
						|
 * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without modification,
 | 
						|
 * are permitted provided that the following conditions are met:
 | 
						|
 *
 | 
						|
 *    1. Redistributions of source code must retain the above copyright notice, this
 | 
						|
 *       list of conditions and the following disclaimer.
 | 
						|
 *
 | 
						|
 *    2. Redistributions in binary form must reproduce the above copyright notice,
 | 
						|
 *       this list of conditions and the following disclaimer in the documentation
 | 
						|
 *       and/or other materials provided with the distribution.
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
 | 
						|
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
						|
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
 | 
						|
 * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
						|
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
						|
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
						|
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
						|
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 | 
						|
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 | 
						|
 * THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
 */
 | 
						|
 | 
						|
package lz4
 | 
						|
 | 
						|
import (
 | 
						|
	"errors"
 | 
						|
	"sync"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	minMatch              = 4
 | 
						|
	hashLog               = 16
 | 
						|
	hashTableSize         = 1 << hashLog
 | 
						|
	hashShift             = (minMatch * 8) - hashLog
 | 
						|
	incompressible uint32 = 128
 | 
						|
	uninitHash            = 0x88888888
 | 
						|
 | 
						|
	mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
 | 
						|
	// MaxInputSize is the largest buffer than can be compressed in a single block
 | 
						|
	MaxInputSize = 0x7E000000
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	// ErrTooLarge indicates the input buffer was too large
 | 
						|
	ErrTooLarge       = errors.New("input too large")
 | 
						|
	ErrEncodeTooSmall = errors.New("encode buffer too small")
 | 
						|
 | 
						|
	hashPool = sync.Pool{
 | 
						|
		New: func() interface{} {
 | 
						|
			return make([]uint32, hashTableSize)
 | 
						|
		},
 | 
						|
	}
 | 
						|
)
 | 
						|
 | 
						|
type encoder struct {
 | 
						|
	src       []byte
 | 
						|
	dst       []byte
 | 
						|
	hashTable []uint32
 | 
						|
	pos       uint32
 | 
						|
	anchor    uint32
 | 
						|
	dpos      uint32
 | 
						|
}
 | 
						|
 | 
						|
// CompressBound returns the maximum length of a lz4 block
 | 
						|
func CompressBound(isize int) int {
 | 
						|
	if isize > MaxInputSize {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
	return isize + ((isize) / 255) + 16
 | 
						|
}
 | 
						|
 | 
						|
func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
 | 
						|
 | 
						|
	ln := length
 | 
						|
 | 
						|
	var code byte
 | 
						|
	if ln > runMask-1 {
 | 
						|
		code = runMask
 | 
						|
	} else {
 | 
						|
		code = byte(ln)
 | 
						|
	}
 | 
						|
 | 
						|
	if mlLen > mlMask-1 {
 | 
						|
		e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
 | 
						|
	} else {
 | 
						|
		e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
 | 
						|
	}
 | 
						|
	e.dpos++
 | 
						|
 | 
						|
	if code == runMask {
 | 
						|
		ln -= runMask
 | 
						|
		for ; ln > 254; ln -= 255 {
 | 
						|
			e.dst[e.dpos] = 255
 | 
						|
			e.dpos++
 | 
						|
		}
 | 
						|
 | 
						|
		e.dst[e.dpos] = byte(ln)
 | 
						|
		e.dpos++
 | 
						|
	}
 | 
						|
 | 
						|
	for ii := uint32(0); ii < length; ii++ {
 | 
						|
		e.dst[e.dpos+ii] = e.src[pos+ii]
 | 
						|
	}
 | 
						|
 | 
						|
	e.dpos += length
 | 
						|
}
 | 
						|
 | 
						|
// Encode returns the encoded form of src.  The returned array may be a
 | 
						|
// sub-slice of dst if it was large enough to hold the entire output.
 | 
						|
func Encode(dst, src []byte) (compressedSize int, error error) {
 | 
						|
	if len(src) >= MaxInputSize {
 | 
						|
		return 0, ErrTooLarge
 | 
						|
	}
 | 
						|
 | 
						|
	if n := CompressBound(len(src)); len(dst) < n {
 | 
						|
		return 0, ErrEncodeTooSmall
 | 
						|
	}
 | 
						|
 | 
						|
	hashTable := hashPool.Get().([]uint32)
 | 
						|
	for i := range hashTable {
 | 
						|
		hashTable[i] = 0
 | 
						|
	}
 | 
						|
	e := encoder{src: src, dst: dst, hashTable: hashTable}
 | 
						|
	defer func() {
 | 
						|
		hashPool.Put(hashTable)
 | 
						|
	}()
 | 
						|
	// binary.LittleEndian.PutUint32(dst, uint32(len(src)))
 | 
						|
	// e.dpos = 0
 | 
						|
 | 
						|
	var (
 | 
						|
		step  uint32 = 1
 | 
						|
		limit        = incompressible
 | 
						|
	)
 | 
						|
 | 
						|
	for {
 | 
						|
		if int(e.pos)+12 >= len(e.src) {
 | 
						|
			e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
 | 
						|
			return int(e.dpos), nil
 | 
						|
		}
 | 
						|
 | 
						|
		sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
 | 
						|
 | 
						|
		hash := (sequence * 2654435761) >> hashShift
 | 
						|
		ref := e.hashTable[hash] + uninitHash
 | 
						|
		e.hashTable[hash] = e.pos - uninitHash
 | 
						|
 | 
						|
		if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
 | 
						|
			if e.pos-e.anchor > limit {
 | 
						|
				limit <<= 1
 | 
						|
				step += 1 + (step >> 2)
 | 
						|
			}
 | 
						|
			e.pos += step
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if step > 1 {
 | 
						|
			e.hashTable[hash] = ref - uninitHash
 | 
						|
			e.pos -= step - 1
 | 
						|
			step = 1
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		limit = incompressible
 | 
						|
 | 
						|
		ln := e.pos - e.anchor
 | 
						|
		back := e.pos - ref
 | 
						|
 | 
						|
		anchor := e.anchor
 | 
						|
 | 
						|
		e.pos += minMatch
 | 
						|
		ref += minMatch
 | 
						|
		e.anchor = e.pos
 | 
						|
 | 
						|
		for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
 | 
						|
			e.pos++
 | 
						|
			ref++
 | 
						|
		}
 | 
						|
 | 
						|
		mlLen := e.pos - e.anchor
 | 
						|
 | 
						|
		e.writeLiterals(ln, mlLen, anchor)
 | 
						|
		e.dst[e.dpos] = uint8(back)
 | 
						|
		e.dst[e.dpos+1] = uint8(back >> 8)
 | 
						|
		e.dpos += 2
 | 
						|
 | 
						|
		if mlLen > mlMask-1 {
 | 
						|
			mlLen -= mlMask
 | 
						|
			for mlLen > 254 {
 | 
						|
				mlLen -= 255
 | 
						|
 | 
						|
				e.dst[e.dpos] = 255
 | 
						|
				e.dpos++
 | 
						|
			}
 | 
						|
 | 
						|
			e.dst[e.dpos] = byte(mlLen)
 | 
						|
			e.dpos++
 | 
						|
		}
 | 
						|
 | 
						|
		e.anchor = e.pos
 | 
						|
	}
 | 
						|
}
 |