From 094bac9c1077f4aa43cf06d982c2f75afd6870ad Mon Sep 17 00:00:00 2001 From: Wisellama Date: Sat, 17 Feb 2024 23:47:18 -0800 Subject: [PATCH] tests work --- crockford_test.go | 62 +++++++++++-- main.go | 206 +++++++++++++++++++++++++++++-------------- ulid_test.go | 218 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+), 68 deletions(-) create mode 100644 ulid_test.go diff --git a/crockford_test.go b/crockford_test.go index f2b4e87..0e2a6df 100644 --- a/crockford_test.go +++ b/crockford_test.go @@ -5,11 +5,63 @@ import ( ) func TestCrockfordEncode(t *testing.T) { - input := []byte{1, 141, 178, 57, 150, 88, 1, 148, 253, 194, 250, 47, 252, 192, 65, 211} + runAll := true - expected := "01HPS3K5JR06AFVGQT5ZYC0GEK" - output := CrockfordEncode(input) - if expected != output { - t.Errorf("expected %v, got %v", expected, output) + type testData struct { + TestName string + RunIt bool + Input []byte + Expected string + } + + tests := []testData{ + { + TestName: "nil", + RunIt: false || runAll, + Input: nil, + Expected: "", + }, + { + TestName: "1 byte", + RunIt: false || runAll, + Input: []byte{0b11111}, + Expected: "0Z", + }, + { + TestName: "2 bytes", + RunIt: false || runAll, + Input: []byte{0b00000011, 0b11111111}, + Expected: "00ZZ", + }, + { + TestName: "valid ulid", + RunIt: false || runAll, + Input: []byte{1, 141, 178, 57, 150, 88, 1, 148, 253, 194, 250, 47, 252, 192, 65, 211}, + Expected: "01HPS3K5JR06AFVGQT5ZYC0GEK", + }, + { + TestName: "quick brown fox", + RunIt: false || runAll, + Input: []byte("The quick brown fox jumps over the lazy dog."), + Expected: "1A6GS90E5TPJRVB41H74VVQDRG6CVVR41N7AVBGECG6YXK5E8G78T3541P62YKS41J6YSSE", + }, + { + TestName: "max ULID", + RunIt: false || runAll, + Input: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, + Expected: "7ZZZZZZZZZZZZZZZZZZZZZZZZZ", + }, + } + + for _, test := range tests { + t.Run(test.TestName, func(t *testing.T) { + if !test.RunIt { + t.SkipNow() + } + output := CrockfordEncode(test.Input) + if test.Expected != output { + t.Errorf("expected %v, got %v", test.Expected, output) + } + }) } } diff --git a/main.go b/main.go index 50a1ae3..3ae118f 100644 --- a/main.go +++ b/main.go @@ -2,8 +2,12 @@ package main import ( "encoding/binary" + "errors" + "fmt" + "io" "log" "math/rand" + "slices" "time" "github.com/oklog/ulid" @@ -14,8 +18,12 @@ import ( func main() { t := time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC) + t = time.Unix(0, 0) ms := uint64(t.UnixMilli()) - msBytes := GetMSBytes(t) + msBytes, err := GetMSBytes(t) + if err != nil { + log.Fatal(err) + } log.Printf("%X%X%X%X%X%X", msBytes[0], msBytes[1], msBytes[2], msBytes[3], msBytes[4], msBytes[5]) @@ -27,31 +35,66 @@ func main() { log.Fatal(err) } - log.Printf("%v", u) + ub, _ := u.MarshalBinary() + log.Printf("%X", ub) + log.Printf("%v", u.String()) log.Printf("%016X", u.Time()) log.Printf("%016X", ms) - log.Printf("%016X", ms) - - // ULID is a 128-bit (16-byte) value similar to a UUID. - // The first 48-bits (6 bytes) are based on a timestamp. - // The remaining 80-bits (10 bytes) are random. - // I'm not implementing the monotonicity part of the ULID spec - // because I don't need it. - entropy = rand.New(rand.NewSource(seed)) - randomBytes := make([]byte, 10) - _, err = entropy.Read(randomBytes) + ulidBytes, err := NewULID(t, entropy) if err != nil { - log.Fatalf("failed to read bytes from entropy source") + log.Fatal(err) + } + log.Printf("%X", ulidBytes) + + ulidString := CrockfordEncode(ulidBytes) + log.Printf("ULID string: %v", ulidString) + +} + +// NewULIDString create a new ULID and returns its encoded string. +// See NewULID for more details. +func NewULIDString(t time.Time, entropy io.Reader) (string, error) { + bytes, err := NewULID(t, entropy) + if err != nil { + return "", err } + s := CrockfordEncode(bytes) + return s, nil +} + +// NewULID creates a new ULID. +// +// A ULID is a 128-bit (16-byte) value similar to a UUID (and +// compatible with UUIDs because of this). The first 48-bits (6 bytes) +// are based on a timestamp. The remaining 80-bits (10 bytes) are +// random. I'm not implementing the monotonicity part of the ULID spec +// because I don't need it. Any ULIDs created during the same +// millisecond will just receive random values with no ordering +// guarantee. +func NewULID(t time.Time, entropy io.Reader) ([]byte, error) { + if entropy == nil { + return nil, errors.New("entropy was nil") + } + + randomBytes := make([]byte, 10) + _, err := entropy.Read(randomBytes) + if err != nil { + return nil, fmt.Errorf("failed to read bytes from entropy source: %w", err) + } + + msBytes, err := GetMSBytes(t) + if err != nil { + return nil, err + } if len(msBytes) != 6 { - log.Fatalf("timestamp bytes are wrong") + return nil, errors.New("timestamp bytes are wrong") } if len(randomBytes) != 10 { - log.Fatalf("random bytes are wrong") + return nil, errors.New("random bytes are wrong") } ulidBytes := make([]byte, 0, 16) @@ -62,9 +105,7 @@ func main() { ulidBytes = append(ulidBytes, b) } - ulidString := CrockfordEncode(ulidBytes) - log.Printf("ULID string: %v", ulidString) - + return ulidBytes, nil } // GetMSBytes returns the given Unix time in milliseconds as a 6-byte @@ -72,20 +113,24 @@ func main() { // bytes) and returns that 6 byte array. According to the ULID spec, // 48-bits is enough room that we won't run out of space until 10889 // AD. -func GetMSBytes(t time.Time) []byte { +func GetMSBytes(t time.Time) ([]byte, error) { ms := uint64(t.UnixMilli()) // Put the 64-bit int into a byte array bytes := make([]byte, 8) binary.BigEndian.PutUint64(bytes, ms) + if bytes[0] != 0 || bytes[1] != 0 { + return nil, errors.New("time overflow") + } + // Chop off the first 2 bytes (16 bits) to get the 6 byte (48-bit) // output. - return bytes[2:] + return bytes[2:], nil } var ( - crockfordEncodeMap = map[uint64]rune{ + crockfordEncodeMap = map[uint8]rune{ 0: '0', 1: '1', 2: '2', @@ -120,7 +165,7 @@ var ( 31: 'Z', } - crockfordDecodeMap = map[rune]uint64{ + crockfordDecodeMap = map[rune]uint32{ '0': 0, 'O': 0, 'o': 0, @@ -131,62 +176,97 @@ var ( ) // CrockfordEncode takes a byte array and encodes every 5-bits as a -// character string according to Crockford's base 32 encoding. +// character string according to Crockford's base 32 encoding. This +// specific implementation uses Big Endian byte order to fit the ULID +// spec ("network byte ordering") // // https://www.crockford.com/base32.html func CrockfordEncode(bytes []byte) string { // Crockford is a base 32 encoding. // 2^5 = 32, so every 5 bits will give us a character. - // Each byte is 8 bits, so we'll have to smoosh a few bytes together. - // For ULIDs, we have 128 bits which doesn't evenly divide by 5. // + // Each byte is 8 bits, so we'll have to smoosh bytes together to + // get values divisible by 5. Any remainder will be padded with + // zeros. + // + // For ULIDs, we have 128 bits which doesn't evenly divide by 5. // Technically we'll be encoding 130 bits of information // (divisible by 5), but the timestamp will should always start - // with zeros. - if len(bytes) < 16 { - log.Printf("failed to encode, expected a 16 byte ULID") + // with some zero padding. + // + // According to the spec, this is why the maximum ULID value is + // `7ZZZZZZZZZZZZZZZZZZZZZZZZZ` instead of all Z's. + // The largest supported timestamp is 2^48 - 1. + + if len(bytes) == 0 { return "" } - log.Printf("bytes: %v", bytes) + // Split our byte array up into 5-bit sections and determine how + // much of a remainder we have. + splitSize := len(bytes) * 8 / 5 + remainder := len(bytes) * 8 % 5 - // Split our bytes up into groups 40 bits each = 120 out of our - // 130 bits. Put these into byte arrays that are 8 bytes long so - // that we can convert them into uint64s. - last := append([]byte{0, 0, 0}, bytes[11:]...) // 11 12 13 14 15 - log.Printf("last: %b", last) - third := append([]byte{0, 0, 0}, bytes[6:11]...) // 6 7 8 9 10 - second := append([]byte{0, 0, 0}, bytes[1:7]...) // 1 2 3 4 5 - // Plus the last 8 bits and 2 padding zeros to give us the remaining 10. - first := append([]byte{0, 0, 0, 0, 0, 0, 0}, bytes[0:1]...) // 0 + // Then determine how many uint8's we need to represent these + // bits. + numInts := splitSize + if remainder > 0 { + numInts += 1 + } + intList := make([]uint8, 0, numInts) - // Convert each of those into integers so we have all the bits in one place. - lastInt := binary.BigEndian.Uint64(last) - thirdInt := binary.BigEndian.Uint64(third) - secondInt := binary.BigEndian.Uint64(second) - firstInt := binary.BigEndian.Uint64(first) + // Go right to left across the bits grabbing each 5-bit chunk + byteIndex := len(bytes) - 1 + bitsRemaining := 8 + bitsNeeded := 5 + currentByte := uint8(bytes[byteIndex]) + for byteIndex >= 0 { + mask := uint8(0b11111) + + // We have all the bits we need + if bitsRemaining > bitsNeeded { + // Just grab what we need and shift down + bitsRemaining -= 5 + newInt := currentByte & mask + currentByte = currentByte >> 5 + intList = append(intList, newInt) + } else { + // Take our remaining bits and them fill in the rest from the next byte + bitsNeeded -= bitsRemaining + oldB := currentByte + byteIndex-- + + // Grab the next byte and shift it upwards + tempB := byte(0) + if byteIndex >= 0 { + tempB = uint8(bytes[byteIndex]) + } + tempB = tempB << bitsRemaining + + // Merge its bits with our remaining bits. + merged := tempB | oldB + newInt := merged & mask + intList = append(intList, newInt) + + // Finally grab the next byte and shift it downwards to + // discard the bits we already used. + if byteIndex >= 0 { + currentByte = uint8(bytes[byteIndex]) + currentByte = currentByte >> bitsNeeded + } + + // Update our tracking values + bitsRemaining = 8 - bitsNeeded + bitsNeeded = 5 + } + } + + slices.Reverse(intList) // Encode those ints into strings 5-bits at a time. - output := make([]rune, 0, 26) - shiftedInt := uint64(0) - for i := 1; i >= 0; i-- { - shiftedInt = firstInt >> (i * 5) - lookup := shiftedInt & 0b11111 - output = append(output, crockfordEncodeMap[lookup]) - } - for i := 7; i >= 0; i-- { - shiftedInt = secondInt >> (i * 5) - lookup := shiftedInt & 0b11111 - output = append(output, crockfordEncodeMap[lookup]) - } - for i := 7; i >= 0; i-- { - shiftedInt = thirdInt >> (i * 5) - lookup := shiftedInt & 0b11111 - output = append(output, crockfordEncodeMap[lookup]) - } - for i := 7; i >= 0; i-- { - shiftedInt = lastInt >> (i * 5) - lookup := shiftedInt & 0b11111 + output := make([]rune, 0, len(intList)) + for _, i := range intList { + lookup := i & 0b11111 output = append(output, crockfordEncodeMap[lookup]) } diff --git a/ulid_test.go b/ulid_test.go new file mode 100644 index 0000000..7a571f9 --- /dev/null +++ b/ulid_test.go @@ -0,0 +1,218 @@ +package main + +import ( + "fmt" + "io" + "log" + "math/rand" + "testing" + "time" +) + +func TestGetMSBytes(t *testing.T) { + runAll := true + + type testData struct { + TestName string + RunIt bool + Time time.Time + Expected []byte + Err bool + } + + tests := []testData{ + { + TestName: "normal time", + RunIt: false || runAll, + Time: time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC), + Expected: []byte{0x01, 0x8D, 0xB2, 0x39, 0x96, 0x58}, + Err: false, + }, + { + TestName: "zero time", + RunIt: false || runAll, + Time: time.Time{}, // zero time overflows when using Unix epoch + Expected: []byte{}, + Err: true, + }, + { + TestName: "max time", + RunIt: false || runAll, + Time: time.UnixMilli(1 << 48), + Expected: []byte{}, + Err: true, + }, + { + TestName: "max time minus 1", + RunIt: false || runAll, + Time: time.UnixMilli(int64(1<<48) - 1), + Expected: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, + Err: false, + }, + } + + for _, test := range tests { + t.Run(test.TestName, func(t *testing.T) { + if !test.RunIt { + t.SkipNow() + } + + output, err := GetMSBytes(test.Time) + if test.Err { + if err == nil { + t.Errorf("expected an error") + } + } else { + errMsg := fmt.Sprintf("expected: %X, received %X", test.Expected, output) + if len(test.Expected) != len(output) { + t.Fatal(errMsg) + } + for i, b := range test.Expected { + if b != output[i] { + t.Error(errMsg) + } + } + } + }) + } +} + +func TestNewULID(t *testing.T) { + runAll := true + + type testData struct { + TestName string + RunIt bool + Time time.Time + Entropy io.Reader + Expected []byte + Err bool + } + + tests := []testData{ + { + TestName: "nil entropy", + RunIt: false || runAll, + Time: time.Time{}, + Entropy: nil, + Expected: nil, + Err: true, + }, + { + TestName: "Unix zero time", + RunIt: false || runAll, + Time: time.Unix(0, 0), + Entropy: rand.New(rand.NewSource(0)), + Expected: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x94, 0xFD, 0xC2, 0xFA, 0x2F, 0xFC, 0xC0, 0x41, 0xD3}, + Err: false, + }, + { + TestName: "time overflow", + RunIt: false || runAll, + Time: time.Time{}, // zero time overflows when using Unix epoch time + Entropy: rand.New(rand.NewSource(0)), + Expected: nil, + Err: true, + }, + { + TestName: "seed 0, real time", + RunIt: false || runAll, + Time: time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC), + Entropy: rand.New(rand.NewSource(0)), + Expected: []byte{0x01, 0x8D, 0xB2, 0x39, 0x96, 0x58, 0x01, 0x94, 0xFD, 0xC2, 0xFA, 0x2F, 0xFC, 0xC0, 0x41, 0xD3}, + Err: false, + }, + } + + for _, test := range tests { + t.Run(test.TestName, func(t *testing.T) { + if !test.RunIt { + t.SkipNow() + } + + output, err := NewULID(test.Time, test.Entropy) + if test.Err { + if err == nil { + t.Errorf("expected an error") + } + } else { + errMsg := fmt.Sprintf("expected: %X, received %X", test.Expected, output) + if len(test.Expected) != len(output) { + t.Fatal(errMsg) + } + for i, b := range test.Expected { + if b != output[i] { + t.Fatal(errMsg) + } + } + } + }) + } +} + +func TestULIDString(t *testing.T) { + runAll := true + + type testData struct { + TestName string + RunIt bool + Time time.Time + Entropy io.Reader + Expected string + Err bool + } + + tests := []testData{ + { + TestName: "nil entropy", + RunIt: false || runAll, + Time: time.Time{}, + Entropy: nil, + Expected: "", + Err: true, + }, + { + TestName: "Unix zero time", + RunIt: false || runAll, + Time: time.Unix(0, 0), + Entropy: rand.New(rand.NewSource(0)), + Expected: "000000000006AFVGQT5ZYC0GEK", + Err: false, + }, + { + TestName: "time overflow", + RunIt: false || runAll, + Time: time.Time{}, // zero time overflows when using Unix epoch time + Entropy: rand.New(rand.NewSource(0)), + Expected: "", + Err: true, + }, + { + TestName: "seed 0, real time", + RunIt: false || runAll, + Time: time.Date(2024, 02, 16, 14, 02, 15, 17, time.UTC), + Entropy: rand.New(rand.NewSource(0)), + Expected: "01HPS3K5JR06AFVGQT5ZYC0GEK", + Err: false, + }, + } + + for _, test := range tests { + t.Run(test.TestName, func(t *testing.T) { + if !test.RunIt { + t.SkipNow() + } + + output, err := NewULIDString(test.Time, test.Entropy) + if test.Err { + if err == nil { + t.Errorf("expected an error") + } + } else { + if test.Expected != output { + log.Fatalf("expected: %s, received %s", test.Expected, output) + } + } + }) + } +}