Skip to content

Commit f41ad96

Browse files
Add run-length-encoding exercise (#220)
1 parent ba08f3b commit f41ad96

File tree

12 files changed

+3735
-0
lines changed

12 files changed

+3735
-0
lines changed

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,14 @@
342342
"strings"
343343
]
344344
},
345+
{
346+
"slug": "run-length-encoding",
347+
"name": "Run Length Encoding",
348+
"uuid": "06de39e4-dddc-4375-921c-9b3862fd58ee",
349+
"practices": [],
350+
"prerequisites": [],
351+
"difficulty": 5
352+
},
345353
{
346354
"slug": "acronym",
347355
"name": "Acronym",
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Instructions
2+
3+
Implement run-length encoding and decoding.
4+
5+
Run-length encoding (RLE) is a simple form of data compression, where runs (consecutive data elements) are replaced by just one data value and count.
6+
7+
For example we can represent the original 53 characters with only 13.
8+
9+
```text
10+
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB" -> "12WB12W3B24WB"
11+
```
12+
13+
RLE allows the original data to be perfectly reconstructed from the compressed data, which makes it a lossless data compression.
14+
15+
```text
16+
"AABCCCDEEEE" -> "2AB3CD4E" -> "AABCCCDEEEE"
17+
```
18+
19+
For simplicity, you can assume that the unencoded string will only contain the letters A through Z (either lower or upper case) and whitespace.
20+
This way data to be encoded will never contain any numbers and numbers inside data to be decoded always represent the count for the following character.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.o
2+
tests
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"authors": [
3+
"keiravillekode"
4+
],
5+
"files": {
6+
"solution": [
7+
"run_length_encoding.asm"
8+
],
9+
"test": [
10+
"run_length_encoding_test.c"
11+
],
12+
"example": [
13+
".meta/example.asm"
14+
]
15+
},
16+
"blurb": "Implement run-length encoding and decoding.",
17+
"source": "Wikipedia",
18+
"source_url": "https://en.wikipedia.org/wiki/Run-length_encoding"
19+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
section .text
2+
global encode, decode
3+
4+
5+
; rdi - buffer
6+
; rsi - string
7+
;
8+
; r8b - previous byte
9+
; r9b - current byte
10+
; r10 - 10
11+
; r11 - input pointer at byte after start of run
12+
encode:
13+
mov r8b, byte [rsi] ; read first byte from input string
14+
inc rsi
15+
mov r11, rsi
16+
mov r10, 10
17+
test r8b, r8b ; check if we have reached null terminator
18+
jz done
19+
20+
.encode_in:
21+
mov r9b, byte [rsi] ; read next byte from input string
22+
inc rsi
23+
cmp r9b, r8b
24+
jz .encode_in ; repeat while run continues
25+
26+
mov rax, rsi
27+
sub rax, r11 ; length of run
28+
cmp rax, 1
29+
jz .encode_out
30+
31+
mov r11, rdi ; address where first digit will be written
32+
33+
.digit_out:
34+
xor rdx, rdx
35+
div r10 ; divide by 10
36+
add dl, '0' ; convert remainder from number 0..9 to ASCII digit '0'..'9'
37+
mov byte [rdi], dl ; write digit to output string
38+
inc rdi
39+
test rax, rax ; check if quotient is 0
40+
jnz .digit_out
41+
42+
mov rcx, rdi
43+
dec rcx ; address where last digit was written
44+
45+
.reverse:
46+
mov al, byte [r11]
47+
mov dl, byte [rcx]
48+
mov byte [rcx], al
49+
mov byte [r11], dl
50+
inc r11
51+
dec rcx
52+
cmp r11, rcx
53+
jl .reverse
54+
55+
.encode_out:
56+
mov byte [rdi], r8b ; write previous byte to output string
57+
inc rdi
58+
test r9b, r9b ; check if we have reached null terminator
59+
jz done
60+
61+
mov r8b, r9b
62+
mov r11, rsi ; start a new run
63+
jmp .encode_in
64+
65+
66+
; Shared by encode and decode
67+
done:
68+
mov byte [rdi], 0
69+
ret
70+
71+
72+
; rdi - buffer
73+
; rsi - string
74+
;
75+
; rax - count
76+
; r8b - byte
77+
; r10 - 10
78+
; r11 - digit value
79+
decode:
80+
xor r11, r11 ; clear digit value
81+
xor rax, rax ; clear count
82+
mov r10, 10
83+
jmp .decode_in
84+
85+
; If rax is 0 or 1, the byte will be output once.
86+
.decode_out:
87+
mov byte [rdi], r8b ; write byte to output buffer
88+
inc rdi
89+
dec rax
90+
jg .decode_out ; repeat rax times
91+
92+
xor rax, rax
93+
94+
.decode_in:
95+
mov r8b, byte [rsi] ; read byte from input string
96+
inc rsi
97+
98+
cmp r8b, '9'
99+
ja .decode_out ; output character that is not a digit or null
100+
101+
test r8b, r8b ; check if we have reached null terminator
102+
jz done
103+
104+
mov r11b, r8b
105+
sub r11b, '0'
106+
jl .decode_out ; output character if it is not a digit
107+
108+
mul r10 ; multiply count by 10
109+
add rax, r11 ; increase count by current digit
110+
jmp .decode_in
111+
112+
113+
%ifidn __OUTPUT_FORMAT__,elf64
114+
section .note.GNU-stack noalloc noexec nowrite progbits
115+
%endif
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# This is an auto-generated file.
2+
#
3+
# Regenerating this file via `configlet sync` will:
4+
# - Recreate every `description` key/value pair
5+
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
6+
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
7+
# - Preserve any other key/value pair
8+
#
9+
# As user-added comments (using the # character) will be removed when this file
10+
# is regenerated, comments can be added via a `comment` key.
11+
12+
[ad53b61b-6ffc-422f-81a6-61f7df92a231]
13+
description = "run-length encode a string -> empty string"
14+
15+
[52012823-b7e6-4277-893c-5b96d42f82de]
16+
description = "run-length encode a string -> single characters only are encoded without count"
17+
18+
[b7868492-7e3a-415f-8da3-d88f51f80409]
19+
description = "run-length encode a string -> string with no single characters"
20+
21+
[859b822b-6e9f-44d6-9c46-6091ee6ae358]
22+
description = "run-length encode a string -> single characters mixed with repeated characters"
23+
24+
[1b34de62-e152-47be-bc88-469746df63b3]
25+
description = "run-length encode a string -> multiple whitespace mixed in string"
26+
27+
[abf176e2-3fbd-40ad-bb2f-2dd6d4df721a]
28+
description = "run-length encode a string -> lowercase characters"
29+
30+
[7ec5c390-f03c-4acf-ac29-5f65861cdeb5]
31+
description = "run-length decode a string -> empty string"
32+
33+
[ad23f455-1ac2-4b0e-87d0-b85b10696098]
34+
description = "run-length decode a string -> single characters only"
35+
36+
[21e37583-5a20-4a0e-826c-3dee2c375f54]
37+
description = "run-length decode a string -> string with no single characters"
38+
39+
[1389ad09-c3a8-4813-9324-99363fba429c]
40+
description = "run-length decode a string -> single characters with repeated characters"
41+
42+
[3f8e3c51-6aca-4670-b86c-a213bf4706b0]
43+
description = "run-length decode a string -> multiple whitespace mixed in string"
44+
45+
[29f721de-9aad-435f-ba37-7662df4fb551]
46+
description = "run-length decode a string -> lowercase string"
47+
48+
[2a762efd-8695-4e04-b0d6-9736899fbc16]
49+
description = "encode and then decode -> encode followed by decode gives original string"
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
AS = nasm
2+
3+
CFLAGS = -g -Wall -Wextra -pedantic -Werror
4+
LDFLAGS =
5+
ASFLAGS = -g -F dwarf -Werror
6+
7+
ifeq ($(shell uname -s),Darwin)
8+
ifeq ($(shell sysctl -n hw.optional.arm64 2>/dev/null),1)
9+
ALL_CFLAGS = -target x86_64-apple-darwin
10+
endif
11+
ALL_LDFLAGS = -Wl,-pie -Wl,-fatal_warnings
12+
ALL_ASFLAGS = -f macho64 --prefix _
13+
else
14+
ALL_LDFLAGS = -pie -Wl,--fatal-warnings
15+
ALL_ASFLAGS = -f elf64
16+
endif
17+
18+
ALL_CFLAGS += -std=c99 -fPIE -m64 $(CFLAGS)
19+
ALL_LDFLAGS += $(LDFLAGS)
20+
ALL_ASFLAGS += $(ASFLAGS)
21+
22+
C_OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
23+
AS_OBJS = $(patsubst %.asm,%.o,$(wildcard *.asm))
24+
ALL_OBJS = $(filter-out example.o,$(C_OBJS) $(AS_OBJS) vendor/unity.o)
25+
26+
CC_CMD = $(CC) $(ALL_CFLAGS) -c -o $@ $<
27+
28+
all: tests
29+
@./$<
30+
31+
tests: $(ALL_OBJS)
32+
@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -o $@ $(ALL_OBJS)
33+
34+
%.o: %.asm
35+
@$(AS) $(ALL_ASFLAGS) -o $@ $<
36+
37+
%.o: %.c
38+
@$(CC_CMD)
39+
40+
vendor/unity.o: vendor/unity.c vendor/unity.h vendor/unity_internals.h
41+
@$(CC_CMD)
42+
43+
clean:
44+
@rm -f *.o vendor/*.o tests
45+
46+
.PHONY: all clean
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
section .text
2+
global encode, decode
3+
4+
encode:
5+
; Provide your implementation here
6+
ret
7+
8+
decode:
9+
; Provide your implementation here
10+
ret
11+
12+
%ifidn __OUTPUT_FORMAT__,elf64
13+
section .note.GNU-stack noalloc noexec nowrite progbits
14+
%endif

0 commit comments

Comments
 (0)