ulid/main.go

package main

import (
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"log"
	"math/rand"
	"slices"
	"time"

	"github.com/oklog/ulid"
)

// ULID spec is mirrored here:
// https://git.wisellama.rocks/Mirrors/ulid-spec

func main() {
	t := time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC)
	t = time.Unix(0, 0)
	ms := uint64(t.UnixMilli())
	msBytes, err := GetMSBytes(t)
	if err != nil {
		log.Fatal(err)
	}

	log.Printf("%X%X%X%X%X%X", msBytes[0], msBytes[1], msBytes[2], msBytes[3], msBytes[4], msBytes[5])

	seed := int64(0)
	entropy := rand.New(rand.NewSource(seed))

	u, err := ulid.New(ms, entropy)
	if err != nil {
		log.Fatal(err)
	}

	ub, _ := u.MarshalBinary()
	log.Printf("%X", ub)
	log.Printf("%v", u.String())
	log.Printf("%016X", u.Time())

	log.Printf("%016X", ms)

	entropy = rand.New(rand.NewSource(seed))
	ulidBytes, err := NewULID(t, entropy)
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("%X", ulidBytes)

	ulidString := CrockfordEncode(ulidBytes)
	log.Printf("ULID string: %v", ulidString)

}

// NewULIDString create a new ULID and returns its encoded string.
// See NewULID for more details.
func NewULIDString(t time.Time, entropy io.Reader) (string, error) {
	bytes, err := NewULID(t, entropy)
	if err != nil {
		return "", err
	}

	s := CrockfordEncode(bytes)
	return s, nil
}

// NewULID creates a new ULID.
//
// A ULID is a 128-bit (16-byte) value similar to a UUID (and
// compatible with UUIDs because of this). The first 48-bits (6 bytes)
// are based on a timestamp. The remaining 80-bits (10 bytes) are
// random. I'm not implementing the monotonicity part of the ULID spec
// because I don't need it. Any ULIDs created during the same
// millisecond will just receive random values with no ordering
// guarantee.
func NewULID(t time.Time, entropy io.Reader) ([]byte, error) {
	if entropy == nil {
		return nil, errors.New("entropy was nil")
	}

	randomBytes := make([]byte, 10)
	_, err := entropy.Read(randomBytes)
	if err != nil {
		return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err)
	}

	msBytes, err := GetMSBytes(t)
	if err != nil {
		return nil, err
	}
	if len(msBytes) != 6 {
		return nil, errors.New("timestamp bytes are wrong")
	}
	if len(randomBytes) != 10 {
		return nil, errors.New("random bytes are wrong")
	}

	ulidBytes := make([]byte, 0, 16)
	for _, b := range msBytes {
		ulidBytes = append(ulidBytes, b)
	}
	for _, b := range randomBytes {
		ulidBytes = append(ulidBytes, b)
	}

	return ulidBytes, nil
}

// GetMSBytes returns the given Unix time in milliseconds as a 6-byte
// array. It truncates the 64-bit Unix epoch time down to 48-bits (6
// bytes) and returns that 6 byte array. According to the ULID spec,
// 48-bits is enough room that we won't run out of space until 10889
// AD.
func GetMSBytes(t time.Time) ([]byte, error) {
	ms := uint64(t.UnixMilli())

	// Put the 64-bit int into a byte array
	bytes := make([]byte, 8)
	binary.BigEndian.PutUint64(bytes, ms)

	if bytes[0] != 0 || bytes[1] != 0 {
		return nil, errors.New("time overflow")
	}

	// Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit)
	// output.
	return bytes[2:], nil
}

var (
	// crockfordEncodeMap takes binary values and converts them to
	// characters in Crockford base32.
	crockfordEncodeMap = map[uint8]rune{
		0:  '0',
		1:  '1',
		2:  '2',
		3:  '3',
		4:  '4',
		5:  '5',
		6:  '6',
		7:  '7',
		8:  '8',
		9:  '9',
		10: 'A',
		11: 'B',
		12: 'C',
		13: 'D',
		14: 'E',
		15: 'F',
		16: 'G',
		17: 'H',
		18: 'J',
		19: 'K',
		20: 'M',
		21: 'N',
		22: 'P',
		23: 'Q',
		24: 'R',
		25: 'S',
		26: 'T',
		27: 'V',
		28: 'W',
		29: 'X',
		30: 'Y',
		31: 'Z',
	}

	// crockfordDecodeMap takes characters and converts them to binary
	// values based on Crockford base32.
	crockfordDecodeMap = map[rune]uint8{
		'0': 0,
		'O': 0,
		'o': 0,
		'1': 1,
		'I': 1,
		'i': 1,
		'L': 1,
		'l': 1,
		'2': 2,
		'3': 3,
		'4': 4,
		'5': 5,
		'6': 6,
		'7': 7,
		'8': 8,
		'9': 9,
		'A': 10,
		'a': 10,
		'B': 11,
		'b': 11,
		'C': 12,
		'c': 12,
		'D': 13,
		'd': 13,
		'E': 14,
		'e': 14,
		'F': 15,
		'f': 15,
		'G': 16,
		'g': 16,
		'H': 17,
		'h': 17,
		'J': 18,
		'j': 18,
		'K': 19,
		'k': 19,
		'M': 20,
		'm': 20,
		'N': 21,
		'n': 21,
		'P': 22,
		'p': 22,
		'Q': 23,
		'q': 23,
		'R': 24,
		'r': 24,
		'S': 25,
		's': 25,
		'T': 26,
		't': 26,
		'V': 27,
		'v': 27,
		'W': 28,
		'w': 28,
		'X': 29,
		'x': 29,
		'Y': 30,
		'y': 30,
		'Z': 31,
		'z': 31,
	}
)

// CrockfordEncode takes a byte array and encodes it into a character
// string according to Crockford's base 32 encoding. Every 5-bits
// corresponds to a character. This specific implementation uses Big
// Endian byte order to fit the ULID spec ("network byte ordering")
//
// https://www.crockford.com/base32.html
// https://git.wisellama.rocks/Mirrors/ulid-spec
func CrockfordEncode(bytes []byte) string {
	// Crockford is a base 32 encoding.
	// 2^5 = 32, so every 5 bits will give us a character.
	//
	// Each byte is 8 bits, so we'll have to smoosh bytes together to
	// get values divisible by 5. Any remainder will be padded with
	// zeros.
	//
	// For ULIDs, we have 128 bits which doesn't evenly divide by 5.
	// Technically we'll be encoding 130 bits of information
	// (divisible by 5), but the timestamp will should always start
	// with some zero padding.
	//
	// According to the spec, this is why the maximum ULID value is
	// `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's.
	// The largest supported timestamp is 2^48 - 1.

	if len(bytes) == 0 {
		return ""
	}

	// Split our byte array up into 5-bit sections and determine how
	// much of a remainder we have.
	splitSize := len(bytes) * 8 / 5
	remainder := len(bytes) * 8 % 5

	// Then determine how many uint8's we need to represent these
	// bits.
	numInts := splitSize
	if remainder > 0 {
		numInts += 1
	}
	intList := make([]uint8, 0, numInts)

	// Go right to left across the bits grabbing each 5-bit chunk
	byteIndex := len(bytes) - 1
	bitsRemaining := 8
	bitsNeeded := 5
	currentByte := uint8(bytes[byteIndex])
	for byteIndex >= 0 {
		mask := uint8(0b11111)

		// We have all the bits we need
		if bitsRemaining > bitsNeeded {
			// Just grab what we need and shift down
			bitsRemaining -= 5
			newInt := currentByte & mask
			currentByte = currentByte >> 5
			intList = append(intList, newInt)
		} else {
			// Take our remaining bits and them fill in the rest from the next byte
			bitsNeeded -= bitsRemaining
			oldB := currentByte
			byteIndex--

			// Grab the next byte and shift it upwards
			tempB := byte(0)
			if byteIndex >= 0 {
				tempB = uint8(bytes[byteIndex])
			}
			tempB = tempB << bitsRemaining

			// Merge its bits with our remaining bits.
			merged := tempB | oldB
			newInt := merged & mask
			intList = append(intList, newInt)

			// Finally grab the next byte and shift it downwards to
			// discard the bits we already used.
			if byteIndex >= 0 {
				currentByte = uint8(bytes[byteIndex])
				currentByte = currentByte >> bitsNeeded
			}

			// Update our tracking values
			bitsRemaining = 8 - bitsNeeded
			bitsNeeded = 5
		}
	}

	slices.Reverse(intList)

	// Encode those ints into strings 5-bits at a time.
	output := make([]rune, 0, len(intList))
	for _, i := range intList {
		lookup := i & 0b11111
		output = append(output, crockfordEncodeMap[lookup])
	}

	return string(output)
}