Re-organize code into an actual library

main
Wisellama 2024-02-20 22:15:17 -08:00
parent 9edb7e20c7
commit 91b8b60e16
8 changed files with 302 additions and 340 deletions

105
crockford32/crockford32.go Normal file
View File

@ -0,0 +1,105 @@
package crockford32
import "slices"
// CrockfordEncode takes a byte array and encodes it into a character
// string according to Crockford's base 32 encoding. Every 5-bits
// corresponds to a character. This specific implementation uses Big
// Endian byte order to fit the ULID spec ("network byte ordering")
//
// https://www.crockford.com/base32.html
// https://git.wisellama.rocks/Mirrors/ulid-spec
func CrockfordEncode(bytes []byte) string {
// Crockford is a base 32 encoding.
// 2^5 = 32, so every 5 bits will give us a character.
//
// Each byte is 8 bits, so we'll have to smoosh bytes together to
// get values divisible by 5. Any remainder will be padded with
// zeros.
//
// For ULIDs, we have 128 bits which doesn't evenly divide by 5.
// Technically we'll be encoding 130 bits of information
// (divisible by 5), but the timestamp will should always start
// with some zero padding.
//
// According to the spec, this is why the maximum ULID value is
// `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's.
// The largest supported timestamp is 2^48 - 1.
if len(bytes) == 0 {
return ""
}
// Split our byte array up into 5-bit sections and determine how
// much of a remainder we have.
splitSize := len(bytes) * 8 / 5
remainder := len(bytes) * 8 % 5
// Then determine how many uint8's we need to represent these
// bits.
numInts := splitSize
if remainder > 0 {
numInts += 1
}
intList := make([]uint8, 0, numInts)
// Go right to left across the bits grabbing each 5-bit chunk
byteIndex := len(bytes) - 1
bitsRemaining := 8
bitsNeeded := 5
currentByte := uint8(bytes[byteIndex])
for byteIndex >= 0 {
mask := uint8(0b11111)
// We have all the bits we need
if bitsRemaining > bitsNeeded {
// Just grab what we need and shift down
bitsRemaining -= 5
newInt := currentByte & mask
currentByte = currentByte >> 5
intList = append(intList, newInt)
} else {
// Take our remaining bits and them fill in the rest from the next byte
bitsNeeded -= bitsRemaining
oldB := currentByte
byteIndex--
// Grab the next byte and shift it upwards
tempB := byte(0)
if byteIndex >= 0 {
tempB = uint8(bytes[byteIndex])
}
tempB = tempB << bitsRemaining
// Merge its bits with our remaining bits.
merged := tempB | oldB
newInt := merged & mask
intList = append(intList, newInt)
// Finally grab the next byte and shift it downwards to
// discard the bits we already used.
if byteIndex >= 0 {
currentByte = uint8(bytes[byteIndex])
currentByte = currentByte >> bitsNeeded
}
// Update our tracking values
bitsRemaining = 8 - bitsNeeded
bitsNeeded = 5
}
}
// Since we went right-to-left, reverse the list to get our values
// in the correct order.
slices.Reverse(intList)
// Encode those ints into strings 5-bits at a time.
output := make([]rune, 0, len(intList))
for _, i := range intList {
lookup := i & 0b11111
character := crockfordEncodeMap[lookup]
output = append(output, character)
}
return string(output)
}

View File

@ -0,0 +1,105 @@
package crockford32
var (
// crockfordEncodeMap takes binary values and converts them to
// characters in Crockford base32.
crockfordEncodeMap = map[uint8]rune{
0: '0',
1: '1',
2: '2',
3: '3',
4: '4',
5: '5',
6: '6',
7: '7',
8: '8',
9: '9',
10: 'A',
11: 'B',
12: 'C',
13: 'D',
14: 'E',
15: 'F',
16: 'G',
17: 'H',
18: 'J',
19: 'K',
20: 'M',
21: 'N',
22: 'P',
23: 'Q',
24: 'R',
25: 'S',
26: 'T',
27: 'V',
28: 'W',
29: 'X',
30: 'Y',
31: 'Z',
}
// crockfordDecodeMap takes characters and converts them to binary
// values based on Crockford base32.
crockfordDecodeMap = map[rune]uint8{
'0': 0,
'O': 0,
'o': 0,
'1': 1,
'I': 1,
'i': 1,
'L': 1,
'l': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'a': 10,
'B': 11,
'b': 11,
'C': 12,
'c': 12,
'D': 13,
'd': 13,
'E': 14,
'e': 14,
'F': 15,
'f': 15,
'G': 16,
'g': 16,
'H': 17,
'h': 17,
'J': 18,
'j': 18,
'K': 19,
'k': 19,
'M': 20,
'm': 20,
'N': 21,
'n': 21,
'P': 22,
'p': 22,
'Q': 23,
'q': 23,
'R': 24,
'r': 24,
'S': 25,
's': 25,
'T': 26,
't': 26,
'V': 27,
'v': 27,
'W': 28,
'w': 28,
'X': 29,
'x': 29,
'Y': 30,
'y': 30,
'Z': 31,
'z': 31,
}
)

View File

@ -1,4 +1,4 @@
package main
package crockford32
import (
"testing"

4
go.mod
View File

@ -1,5 +1,3 @@
module example.com/ulid
module git.wisellama.rocks/Wisellama/ulid
go 1.21.5
require github.com/oklog/ulid v1.3.1

2
go.sum
View File

@ -1,2 +0,0 @@
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=

333
main.go
View File

@ -1,333 +0,0 @@
package main
import (
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"math/rand"
"slices"
"time"
"github.com/oklog/ulid"
)
// ULID spec is mirrored here:
// https://git.wisellama.rocks/Mirrors/ulid-spec
func main() {
t := time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC)
t = time.Unix(0, 0)
ms := uint64(t.UnixMilli())
msBytes, err := GetMSBytes(t)
if err != nil {
log.Fatal(err)
}
log.Printf("%X%X%X%X%X%X", msBytes[0], msBytes[1], msBytes[2], msBytes[3], msBytes[4], msBytes[5])
seed := int64(0)
entropy := rand.New(rand.NewSource(seed))
u, err := ulid.New(ms, entropy)
if err != nil {
log.Fatal(err)
}
ub, _ := u.MarshalBinary()
log.Printf("%X", ub)
log.Printf("%v", u.String())
log.Printf("%016X", u.Time())
log.Printf("%016X", ms)
entropy = rand.New(rand.NewSource(seed))
ulidBytes, err := NewULID(t, entropy)
if err != nil {
log.Fatal(err)
}
log.Printf("%X", ulidBytes)
ulidString := CrockfordEncode(ulidBytes)
log.Printf("ULID string: %v", ulidString)
}
// NewULIDString create a new ULID and returns its encoded string.
// See NewULID for more details.
func NewULIDString(t time.Time, entropy io.Reader) (string, error) {
bytes, err := NewULID(t, entropy)
if err != nil {
return "", err
}
s := CrockfordEncode(bytes)
return s, nil
}
// NewULID creates a new ULID.
//
// A ULID is a 128-bit (16-byte) value similar to a UUID (and
// compatible with UUIDs because of this). The first 48-bits (6 bytes)
// are based on a timestamp. The remaining 80-bits (10 bytes) are
// random. I'm not implementing the monotonicity part of the ULID spec
// because I don't need it. Any ULIDs created during the same
// millisecond will just receive random values with no ordering
// guarantee.
func NewULID(t time.Time, entropy io.Reader) ([]byte, error) {
if entropy == nil {
return nil, errors.New("entropy was nil")
}
randomBytes := make([]byte, 10)
_, err := entropy.Read(randomBytes)
if err != nil {
return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err)
}
msBytes, err := GetMSBytes(t)
if err != nil {
return nil, err
}
if len(msBytes) != 6 {
return nil, errors.New("timestamp bytes are wrong")
}
if len(randomBytes) != 10 {
return nil, errors.New("random bytes are wrong")
}
ulidBytes := make([]byte, 0, 16)
for _, b := range msBytes {
ulidBytes = append(ulidBytes, b)
}
for _, b := range randomBytes {
ulidBytes = append(ulidBytes, b)
}
return ulidBytes, nil
}
// GetMSBytes returns the given Unix time in milliseconds as a 6-byte
// array. It truncates the 64-bit Unix epoch time down to 48-bits (6
// bytes) and returns that 6 byte array. According to the ULID spec,
// 48-bits is enough room that we won't run out of space until 10889
// AD.
func GetMSBytes(t time.Time) ([]byte, error) {
ms := uint64(t.UnixMilli())
// Put the 64-bit int into a byte array
bytes := make([]byte, 8)
binary.BigEndian.PutUint64(bytes, ms)
if bytes[0] != 0 || bytes[1] != 0 {
return nil, errors.New("time overflow")
}
// Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit)
// output.
return bytes[2:], nil
}
var (
// crockfordEncodeMap takes binary values and converts them to
// characters in Crockford base32.
crockfordEncodeMap = map[uint8]rune{
0: '0',
1: '1',
2: '2',
3: '3',
4: '4',
5: '5',
6: '6',
7: '7',
8: '8',
9: '9',
10: 'A',
11: 'B',
12: 'C',
13: 'D',
14: 'E',
15: 'F',
16: 'G',
17: 'H',
18: 'J',
19: 'K',
20: 'M',
21: 'N',
22: 'P',
23: 'Q',
24: 'R',
25: 'S',
26: 'T',
27: 'V',
28: 'W',
29: 'X',
30: 'Y',
31: 'Z',
}
// crockfordDecodeMap takes characters and converts them to binary
// values based on Crockford base32.
crockfordDecodeMap = map[rune]uint8{
'0': 0,
'O': 0,
'o': 0,
'1': 1,
'I': 1,
'i': 1,
'L': 1,
'l': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'a': 10,
'B': 11,
'b': 11,
'C': 12,
'c': 12,
'D': 13,
'd': 13,
'E': 14,
'e': 14,
'F': 15,
'f': 15,
'G': 16,
'g': 16,
'H': 17,
'h': 17,
'J': 18,
'j': 18,
'K': 19,
'k': 19,
'M': 20,
'm': 20,
'N': 21,
'n': 21,
'P': 22,
'p': 22,
'Q': 23,
'q': 23,
'R': 24,
'r': 24,
'S': 25,
's': 25,
'T': 26,
't': 26,
'V': 27,
'v': 27,
'W': 28,
'w': 28,
'X': 29,
'x': 29,
'Y': 30,
'y': 30,
'Z': 31,
'z': 31,
}
)
// CrockfordEncode takes a byte array and encodes it into a character
// string according to Crockford's base 32 encoding. Every 5-bits
// corresponds to a character. This specific implementation uses Big
// Endian byte order to fit the ULID spec ("network byte ordering")
//
// https://www.crockford.com/base32.html
// https://git.wisellama.rocks/Mirrors/ulid-spec
func CrockfordEncode(bytes []byte) string {
// Crockford is a base 32 encoding.
// 2^5 = 32, so every 5 bits will give us a character.
//
// Each byte is 8 bits, so we'll have to smoosh bytes together to
// get values divisible by 5. Any remainder will be padded with
// zeros.
//
// For ULIDs, we have 128 bits which doesn't evenly divide by 5.
// Technically we'll be encoding 130 bits of information
// (divisible by 5), but the timestamp will should always start
// with some zero padding.
//
// According to the spec, this is why the maximum ULID value is
// `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's.
// The largest supported timestamp is 2^48 - 1.
if len(bytes) == 0 {
return ""
}
// Split our byte array up into 5-bit sections and determine how
// much of a remainder we have.
splitSize := len(bytes) * 8 / 5
remainder := len(bytes) * 8 % 5
// Then determine how many uint8's we need to represent these
// bits.
numInts := splitSize
if remainder > 0 {
numInts += 1
}
intList := make([]uint8, 0, numInts)
// Go right to left across the bits grabbing each 5-bit chunk
byteIndex := len(bytes) - 1
bitsRemaining := 8
bitsNeeded := 5
currentByte := uint8(bytes[byteIndex])
for byteIndex >= 0 {
mask := uint8(0b11111)
// We have all the bits we need
if bitsRemaining > bitsNeeded {
// Just grab what we need and shift down
bitsRemaining -= 5
newInt := currentByte & mask
currentByte = currentByte >> 5
intList = append(intList, newInt)
} else {
// Take our remaining bits and them fill in the rest from the next byte
bitsNeeded -= bitsRemaining
oldB := currentByte
byteIndex--
// Grab the next byte and shift it upwards
tempB := byte(0)
if byteIndex >= 0 {
tempB = uint8(bytes[byteIndex])
}
tempB = tempB << bitsRemaining
// Merge its bits with our remaining bits.
merged := tempB | oldB
newInt := merged & mask
intList = append(intList, newInt)
// Finally grab the next byte and shift it downwards to
// discard the bits we already used.
if byteIndex >= 0 {
currentByte = uint8(bytes[byteIndex])
currentByte = currentByte >> bitsNeeded
}
// Update our tracking values
bitsRemaining = 8 - bitsNeeded
bitsNeeded = 5
}
}
slices.Reverse(intList)
// Encode those ints into strings 5-bits at a time.
output := make([]rune, 0, len(intList))
for _, i := range intList {
lookup := i & 0b11111
output = append(output, crockfordEncodeMap[lookup])
}
return string(output)
}

89
ulid.go Normal file
View File

@ -0,0 +1,89 @@
package ulid
import (
"encoding/binary"
"errors"
"fmt"
"io"
"time"
"git.wisellama.rocks/Wisellama/ulid/crockford32"
)
// The ULID spec is mirrored here:
// https://git.wisellama.rocks/Mirrors/ulid-spec
// NewULIDString create a new ULID and returns its encoded string.
// See NewULID for more details.
func NewULIDString(t time.Time, entropy io.Reader) (string, error) {
bytes, err := NewULID(t, entropy)
if err != nil {
return "", err
}
s := crockford32.CrockfordEncode(bytes)
return s, nil
}
// NewULID creates a new ULID.
//
// A ULID is a 128-bit (16-byte) value similar to a UUID (and
// compatible with UUIDs because of this). The first 48-bits (6 bytes)
// are based on a timestamp. The remaining 80-bits (10 bytes) are
// random. I'm not implementing the monotonicity part of the ULID spec
// because I don't need it. Any ULIDs created during the same
// millisecond will just receive random values with no ordering
// guarantee.
func NewULID(t time.Time, entropy io.Reader) ([]byte, error) {
if entropy == nil {
return nil, errors.New("entropy was nil")
}
randomBytes := make([]byte, 10)
_, err := entropy.Read(randomBytes)
if err != nil {
return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err)
}
msBytes, err := GetMSBytes(t)
if err != nil {
return nil, err
}
if len(msBytes) != 6 {
return nil, errors.New("timestamp bytes are wrong")
}
if len(randomBytes) != 10 {
return nil, errors.New("random bytes are wrong")
}
ulidBytes := make([]byte, 0, 16)
for _, b := range msBytes {
ulidBytes = append(ulidBytes, b)
}
for _, b := range randomBytes {
ulidBytes = append(ulidBytes, b)
}
return ulidBytes, nil
}
// GetMSBytes returns the given Unix time in milliseconds as a 6-byte
// array. It truncates the 64-bit Unix epoch time down to 48-bits (6
// bytes) and returns that 6 byte array. According to the ULID spec,
// 48-bits is enough room that we won't run out of space until 10889
// AD.
func GetMSBytes(t time.Time) ([]byte, error) {
ms := uint64(t.UnixMilli())
// Put the 64-bit int into a byte array
bytes := make([]byte, 8)
binary.BigEndian.PutUint64(bytes, ms)
if bytes[0] != 0 || bytes[1] != 0 {
return nil, errors.New("time overflow")
}
// Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit)
// output.
return bytes[2:], nil
}

View File

@ -1,4 +1,4 @@
package main
package ulid
import (
"fmt"