diff --git a/crockford32/crockford32.go b/crockford32/crockford32.go new file mode 100644 index 0000000..d19053a --- /dev/null +++ b/crockford32/crockford32.go @@ -0,0 +1,105 @@ +package crockford32 + +import "slices" + +// CrockfordEncode takes a byte array and encodes it into a character +// string according to Crockford's base 32 encoding. Every 5-bits +// corresponds to a character. This specific implementation uses Big +// Endian byte order to fit the ULID spec ("network byte ordering") +// +// https://www.crockford.com/base32.html +// https://git.wisellama.rocks/Mirrors/ulid-spec +func CrockfordEncode(bytes []byte) string { + // Crockford is a base 32 encoding. + // 2^5 = 32, so every 5 bits will give us a character. + // + // Each byte is 8 bits, so we'll have to smoosh bytes together to + // get values divisible by 5. Any remainder will be padded with + // zeros. + // + // For ULIDs, we have 128 bits which doesn't evenly divide by 5. + // Technically we'll be encoding 130 bits of information + // (divisible by 5), but the timestamp will should always start + // with some zero padding. + // + // According to the spec, this is why the maximum ULID value is + // `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's. + // The largest supported timestamp is 2^48 - 1. + + if len(bytes) == 0 { + return "" + } + + // Split our byte array up into 5-bit sections and determine how + // much of a remainder we have. + splitSize := len(bytes) * 8 / 5 + remainder := len(bytes) * 8 % 5 + + // Then determine how many uint8's we need to represent these + // bits. + numInts := splitSize + if remainder > 0 { + numInts += 1 + } + intList := make([]uint8, 0, numInts) + + // Go right to left across the bits grabbing each 5-bit chunk + byteIndex := len(bytes) - 1 + bitsRemaining := 8 + bitsNeeded := 5 + currentByte := uint8(bytes[byteIndex]) + for byteIndex >= 0 { + mask := uint8(0b11111) + + // We have all the bits we need + if bitsRemaining > bitsNeeded { + // Just grab what we need and shift down + bitsRemaining -= 5 + newInt := currentByte & mask + currentByte = currentByte >> 5 + intList = append(intList, newInt) + } else { + // Take our remaining bits and them fill in the rest from the next byte + bitsNeeded -= bitsRemaining + oldB := currentByte + byteIndex-- + + // Grab the next byte and shift it upwards + tempB := byte(0) + if byteIndex >= 0 { + tempB = uint8(bytes[byteIndex]) + } + tempB = tempB << bitsRemaining + + // Merge its bits with our remaining bits. + merged := tempB | oldB + newInt := merged & mask + intList = append(intList, newInt) + + // Finally grab the next byte and shift it downwards to + // discard the bits we already used. + if byteIndex >= 0 { + currentByte = uint8(bytes[byteIndex]) + currentByte = currentByte >> bitsNeeded + } + + // Update our tracking values + bitsRemaining = 8 - bitsNeeded + bitsNeeded = 5 + } + } + + // Since we went right-to-left, reverse the list to get our values + // in the correct order. + slices.Reverse(intList) + + // Encode those ints into strings 5-bits at a time. + output := make([]rune, 0, len(intList)) + for _, i := range intList { + lookup := i & 0b11111 + character := crockfordEncodeMap[lookup] + output = append(output, character) + } + + return string(output) +} diff --git a/crockford32/crockford32_maps.go b/crockford32/crockford32_maps.go new file mode 100644 index 0000000..1692df3 --- /dev/null +++ b/crockford32/crockford32_maps.go @@ -0,0 +1,105 @@ +package crockford32 + +var ( + // crockfordEncodeMap takes binary values and converts them to + // characters in Crockford base32. + crockfordEncodeMap = map[uint8]rune{ + 0: '0', + 1: '1', + 2: '2', + 3: '3', + 4: '4', + 5: '5', + 6: '6', + 7: '7', + 8: '8', + 9: '9', + 10: 'A', + 11: 'B', + 12: 'C', + 13: 'D', + 14: 'E', + 15: 'F', + 16: 'G', + 17: 'H', + 18: 'J', + 19: 'K', + 20: 'M', + 21: 'N', + 22: 'P', + 23: 'Q', + 24: 'R', + 25: 'S', + 26: 'T', + 27: 'V', + 28: 'W', + 29: 'X', + 30: 'Y', + 31: 'Z', + } + + // crockfordDecodeMap takes characters and converts them to binary + // values based on Crockford base32. + crockfordDecodeMap = map[rune]uint8{ + '0': 0, + 'O': 0, + 'o': 0, + '1': 1, + 'I': 1, + 'i': 1, + 'L': 1, + 'l': 1, + '2': 2, + '3': 3, + '4': 4, + '5': 5, + '6': 6, + '7': 7, + '8': 8, + '9': 9, + 'A': 10, + 'a': 10, + 'B': 11, + 'b': 11, + 'C': 12, + 'c': 12, + 'D': 13, + 'd': 13, + 'E': 14, + 'e': 14, + 'F': 15, + 'f': 15, + 'G': 16, + 'g': 16, + 'H': 17, + 'h': 17, + 'J': 18, + 'j': 18, + 'K': 19, + 'k': 19, + 'M': 20, + 'm': 20, + 'N': 21, + 'n': 21, + 'P': 22, + 'p': 22, + 'Q': 23, + 'q': 23, + 'R': 24, + 'r': 24, + 'S': 25, + 's': 25, + 'T': 26, + 't': 26, + 'V': 27, + 'v': 27, + 'W': 28, + 'w': 28, + 'X': 29, + 'x': 29, + 'Y': 30, + 'y': 30, + 'Z': 31, + 'z': 31, + } +) diff --git a/crockford_test.go b/crockford32/crockford32_test.go similarity index 98% rename from crockford_test.go rename to crockford32/crockford32_test.go index 0e2a6df..f4e3775 100644 --- a/crockford_test.go +++ b/crockford32/crockford32_test.go @@ -1,4 +1,4 @@ -package main +package crockford32 import ( "testing" diff --git a/go.mod b/go.mod index ebaaa7f..1b256a3 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,3 @@ -module example.com/ulid +module git.wisellama.rocks/Wisellama/ulid go 1.21.5 - -require github.com/oklog/ulid v1.3.1 diff --git a/go.sum b/go.sum index 0c38094..e69de29 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +0,0 @@ -github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= diff --git a/main.go b/main.go deleted file mode 100644 index 7cce268..0000000 --- a/main.go +++ /dev/null @@ -1,333 +0,0 @@ -package main - -import ( - "encoding/binary" - "errors" - "fmt" - "io" - "log" - "math/rand" - "slices" - "time" - - "github.com/oklog/ulid" -) - -// ULID spec is mirrored here: -// https://git.wisellama.rocks/Mirrors/ulid-spec - -func main() { - t := time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC) - t = time.Unix(0, 0) - ms := uint64(t.UnixMilli()) - msBytes, err := GetMSBytes(t) - if err != nil { - log.Fatal(err) - } - - log.Printf("%X%X%X%X%X%X", msBytes[0], msBytes[1], msBytes[2], msBytes[3], msBytes[4], msBytes[5]) - - seed := int64(0) - entropy := rand.New(rand.NewSource(seed)) - - u, err := ulid.New(ms, entropy) - if err != nil { - log.Fatal(err) - } - - ub, _ := u.MarshalBinary() - log.Printf("%X", ub) - log.Printf("%v", u.String()) - log.Printf("%016X", u.Time()) - - log.Printf("%016X", ms) - - entropy = rand.New(rand.NewSource(seed)) - ulidBytes, err := NewULID(t, entropy) - if err != nil { - log.Fatal(err) - } - log.Printf("%X", ulidBytes) - - ulidString := CrockfordEncode(ulidBytes) - log.Printf("ULID string: %v", ulidString) - -} - -// NewULIDString create a new ULID and returns its encoded string. -// See NewULID for more details. -func NewULIDString(t time.Time, entropy io.Reader) (string, error) { - bytes, err := NewULID(t, entropy) - if err != nil { - return "", err - } - - s := CrockfordEncode(bytes) - return s, nil -} - -// NewULID creates a new ULID. -// -// A ULID is a 128-bit (16-byte) value similar to a UUID (and -// compatible with UUIDs because of this). The first 48-bits (6 bytes) -// are based on a timestamp. The remaining 80-bits (10 bytes) are -// random. I'm not implementing the monotonicity part of the ULID spec -// because I don't need it. Any ULIDs created during the same -// millisecond will just receive random values with no ordering -// guarantee. -func NewULID(t time.Time, entropy io.Reader) ([]byte, error) { - if entropy == nil { - return nil, errors.New("entropy was nil") - } - - randomBytes := make([]byte, 10) - _, err := entropy.Read(randomBytes) - if err != nil { - return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err) - } - - msBytes, err := GetMSBytes(t) - if err != nil { - return nil, err - } - if len(msBytes) != 6 { - return nil, errors.New("timestamp bytes are wrong") - } - if len(randomBytes) != 10 { - return nil, errors.New("random bytes are wrong") - } - - ulidBytes := make([]byte, 0, 16) - for _, b := range msBytes { - ulidBytes = append(ulidBytes, b) - } - for _, b := range randomBytes { - ulidBytes = append(ulidBytes, b) - } - - return ulidBytes, nil -} - -// GetMSBytes returns the given Unix time in milliseconds as a 6-byte -// array. It truncates the 64-bit Unix epoch time down to 48-bits (6 -// bytes) and returns that 6 byte array. According to the ULID spec, -// 48-bits is enough room that we won't run out of space until 10889 -// AD. -func GetMSBytes(t time.Time) ([]byte, error) { - ms := uint64(t.UnixMilli()) - - // Put the 64-bit int into a byte array - bytes := make([]byte, 8) - binary.BigEndian.PutUint64(bytes, ms) - - if bytes[0] != 0 || bytes[1] != 0 { - return nil, errors.New("time overflow") - } - - // Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit) - // output. - return bytes[2:], nil -} - -var ( - // crockfordEncodeMap takes binary values and converts them to - // characters in Crockford base32. - crockfordEncodeMap = map[uint8]rune{ - 0: '0', - 1: '1', - 2: '2', - 3: '3', - 4: '4', - 5: '5', - 6: '6', - 7: '7', - 8: '8', - 9: '9', - 10: 'A', - 11: 'B', - 12: 'C', - 13: 'D', - 14: 'E', - 15: 'F', - 16: 'G', - 17: 'H', - 18: 'J', - 19: 'K', - 20: 'M', - 21: 'N', - 22: 'P', - 23: 'Q', - 24: 'R', - 25: 'S', - 26: 'T', - 27: 'V', - 28: 'W', - 29: 'X', - 30: 'Y', - 31: 'Z', - } - - // crockfordDecodeMap takes characters and converts them to binary - // values based on Crockford base32. - crockfordDecodeMap = map[rune]uint8{ - '0': 0, - 'O': 0, - 'o': 0, - '1': 1, - 'I': 1, - 'i': 1, - 'L': 1, - 'l': 1, - '2': 2, - '3': 3, - '4': 4, - '5': 5, - '6': 6, - '7': 7, - '8': 8, - '9': 9, - 'A': 10, - 'a': 10, - 'B': 11, - 'b': 11, - 'C': 12, - 'c': 12, - 'D': 13, - 'd': 13, - 'E': 14, - 'e': 14, - 'F': 15, - 'f': 15, - 'G': 16, - 'g': 16, - 'H': 17, - 'h': 17, - 'J': 18, - 'j': 18, - 'K': 19, - 'k': 19, - 'M': 20, - 'm': 20, - 'N': 21, - 'n': 21, - 'P': 22, - 'p': 22, - 'Q': 23, - 'q': 23, - 'R': 24, - 'r': 24, - 'S': 25, - 's': 25, - 'T': 26, - 't': 26, - 'V': 27, - 'v': 27, - 'W': 28, - 'w': 28, - 'X': 29, - 'x': 29, - 'Y': 30, - 'y': 30, - 'Z': 31, - 'z': 31, - } -) - -// CrockfordEncode takes a byte array and encodes it into a character -// string according to Crockford's base 32 encoding. Every 5-bits -// corresponds to a character. This specific implementation uses Big -// Endian byte order to fit the ULID spec ("network byte ordering") -// -// https://www.crockford.com/base32.html -// https://git.wisellama.rocks/Mirrors/ulid-spec -func CrockfordEncode(bytes []byte) string { - // Crockford is a base 32 encoding. - // 2^5 = 32, so every 5 bits will give us a character. - // - // Each byte is 8 bits, so we'll have to smoosh bytes together to - // get values divisible by 5. Any remainder will be padded with - // zeros. - // - // For ULIDs, we have 128 bits which doesn't evenly divide by 5. - // Technically we'll be encoding 130 bits of information - // (divisible by 5), but the timestamp will should always start - // with some zero padding. - // - // According to the spec, this is why the maximum ULID value is - // `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's. - // The largest supported timestamp is 2^48 - 1. - - if len(bytes) == 0 { - return "" - } - - // Split our byte array up into 5-bit sections and determine how - // much of a remainder we have. - splitSize := len(bytes) * 8 / 5 - remainder := len(bytes) * 8 % 5 - - // Then determine how many uint8's we need to represent these - // bits. - numInts := splitSize - if remainder > 0 { - numInts += 1 - } - intList := make([]uint8, 0, numInts) - - // Go right to left across the bits grabbing each 5-bit chunk - byteIndex := len(bytes) - 1 - bitsRemaining := 8 - bitsNeeded := 5 - currentByte := uint8(bytes[byteIndex]) - for byteIndex >= 0 { - mask := uint8(0b11111) - - // We have all the bits we need - if bitsRemaining > bitsNeeded { - // Just grab what we need and shift down - bitsRemaining -= 5 - newInt := currentByte & mask - currentByte = currentByte >> 5 - intList = append(intList, newInt) - } else { - // Take our remaining bits and them fill in the rest from the next byte - bitsNeeded -= bitsRemaining - oldB := currentByte - byteIndex-- - - // Grab the next byte and shift it upwards - tempB := byte(0) - if byteIndex >= 0 { - tempB = uint8(bytes[byteIndex]) - } - tempB = tempB << bitsRemaining - - // Merge its bits with our remaining bits. - merged := tempB | oldB - newInt := merged & mask - intList = append(intList, newInt) - - // Finally grab the next byte and shift it downwards to - // discard the bits we already used. - if byteIndex >= 0 { - currentByte = uint8(bytes[byteIndex]) - currentByte = currentByte >> bitsNeeded - } - - // Update our tracking values - bitsRemaining = 8 - bitsNeeded - bitsNeeded = 5 - } - } - - slices.Reverse(intList) - - // Encode those ints into strings 5-bits at a time. - output := make([]rune, 0, len(intList)) - for _, i := range intList { - lookup := i & 0b11111 - output = append(output, crockfordEncodeMap[lookup]) - } - - return string(output) -} diff --git a/ulid.go b/ulid.go new file mode 100644 index 0000000..59f6522 --- /dev/null +++ b/ulid.go @@ -0,0 +1,89 @@ +package ulid + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "time" + + "git.wisellama.rocks/Wisellama/ulid/crockford32" +) + +// The ULID spec is mirrored here: +// https://git.wisellama.rocks/Mirrors/ulid-spec + +// NewULIDString create a new ULID and returns its encoded string. +// See NewULID for more details. +func NewULIDString(t time.Time, entropy io.Reader) (string, error) { + bytes, err := NewULID(t, entropy) + if err != nil { + return "", err + } + + s := crockford32.CrockfordEncode(bytes) + return s, nil +} + +// NewULID creates a new ULID. +// +// A ULID is a 128-bit (16-byte) value similar to a UUID (and +// compatible with UUIDs because of this). The first 48-bits (6 bytes) +// are based on a timestamp. The remaining 80-bits (10 bytes) are +// random. I'm not implementing the monotonicity part of the ULID spec +// because I don't need it. Any ULIDs created during the same +// millisecond will just receive random values with no ordering +// guarantee. +func NewULID(t time.Time, entropy io.Reader) ([]byte, error) { + if entropy == nil { + return nil, errors.New("entropy was nil") + } + + randomBytes := make([]byte, 10) + _, err := entropy.Read(randomBytes) + if err != nil { + return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err) + } + + msBytes, err := GetMSBytes(t) + if err != nil { + return nil, err + } + if len(msBytes) != 6 { + return nil, errors.New("timestamp bytes are wrong") + } + if len(randomBytes) != 10 { + return nil, errors.New("random bytes are wrong") + } + + ulidBytes := make([]byte, 0, 16) + for _, b := range msBytes { + ulidBytes = append(ulidBytes, b) + } + for _, b := range randomBytes { + ulidBytes = append(ulidBytes, b) + } + + return ulidBytes, nil +} + +// GetMSBytes returns the given Unix time in milliseconds as a 6-byte +// array. It truncates the 64-bit Unix epoch time down to 48-bits (6 +// bytes) and returns that 6 byte array. According to the ULID spec, +// 48-bits is enough room that we won't run out of space until 10889 +// AD. +func GetMSBytes(t time.Time) ([]byte, error) { + ms := uint64(t.UnixMilli()) + + // Put the 64-bit int into a byte array + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, ms) + + if bytes[0] != 0 || bytes[1] != 0 { + return nil, errors.New("time overflow") + } + + // Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit) + // output. + return bytes[2:], nil +} diff --git a/ulid_test.go b/ulid_test.go index 7a571f9..e4b81f2 100644 --- a/ulid_test.go +++ b/ulid_test.go @@ -1,4 +1,4 @@ -package main +package ulid import ( "fmt"