File tree Expand file tree Collapse file tree 5 files changed +52
-12
lines changed Expand file tree Collapse file tree 5 files changed +52
-12
lines changed Original file line number Diff line number Diff line change 1+ language : julia
2+
3+ os :
4+ - linux
5+
6+ julia :
7+ - 0.7
8+ - 1.0
9+ - 1.1
10+ - nightly
11+
12+ matrix :
13+ allow_failures :
14+ - julia : nightly
15+
16+
17+ branch :
18+ only :
19+ - master
20+
21+ after_success :
22+ - julia -e 'using Pkg; cd(Pkg.dir("BytePairEncoding")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
23+
24+ notifications :
25+ email : false
26+
27+ git :
28+ depth : 99999999
Original file line number Diff line number Diff line change 1+ # This file is machine-generated - editing it directly is not advised
2+
13[[Base64 ]]
24uuid = " 2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
35
Original file line number Diff line number Diff line change @@ -6,5 +6,13 @@ version = "0.1.0"
66[deps ]
77DelimitedFiles = " 8bb1440f-4735-579b-a4ab-409b98df4dab"
88InternedStrings = " 7d512f48-7fb1-5a58-b986-67e6dc259f01"
9- Test = " 8dfed614-e22c-5e08-85e1-65c5234f0b40"
109WordTokenizers = " 796a5d58-b03d-544a-977e-18100b691f6e"
10+
11+ [compat ]
12+ julia = " 0.7, 1"
13+
14+ [extras ]
15+ Test = " 8dfed614-e22c-5e08-85e1-65c5234f0b40"
16+
17+ [targets ]
18+ test = [" Test" ]
Original file line number Diff line number Diff line change 1-
2- # Table of Contents
3-
4- 1 . [ BytePairEncoding.jl] ( #orga6ba865 )
5- 2 . [ API] ( #org3bc249a )
6- 1 . [ Unicode Normalization] ( #orgf84a864 )
7- 3 . [ Examples] ( #orga7c7a03 )
8- 4 . [ Roadmap] ( #orgecfe740 )
9-
10-
111<a id =" orga6ba865 " ></a >
122
133# BytePairEncoding.jl
144
5+ [ ![ Build Status] ( https://travis-ci.com/chengchingwen/BytePairEncoding.jl.svg?branch=master )] ( https://travis-ci.com/chengchingwen/BytePairEncoding.jl )
6+ [ ![ codecov] ( https://codecov.io/gh/chengchingwen/BytePairEncoding.jl/branch/master/graph/badge.svg )] ( https://codecov.io/gh/chengchingwen/BytePairEncoding.jl )
7+
158Pure Julia implementation of the Byte Pair Encoding(BPE) method
169in the [ subword neural machine translation paper] ( https://arxiv.org/abs/1508.07909 ) . It's a port of
1710the original python package [ subword-nmt] ( https://github.com/rsennrich/subword-nmt ) . ` BytePairEncoding.jl ` support different tokenize
1811method(with the help of WordTokenizers.jl). You can simply use ` set_tokenizer([your tokenize function]) `
1912and then Learn the BPE map with it.
2013
2114
15+ # Table of Contents
16+
17+ 1 . [ BytePairEncoding.jl] ( #orga6ba865 )
18+ 2 . [ API] ( #org3bc249a )
19+ 1 . [ Unicode Normalization] ( #orgf84a864 )
20+ 3 . [ Examples] ( #orga7c7a03 )
21+ 4 . [ Roadmap] ( #orgecfe740 )
22+
23+
2224<a id =" org3bc249a " ></a >
2325
2426# API
Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ tests = [
2222@testset " BytePairEncoding" begin
2323 for t in tests
2424 fp = joinpath (dirname (@__FILE__ ), " test_$t .jl" )
25- println ( " $fp ... " )
25+ @info " Test $t "
2626 include (fp)
2727 end
2828end
You can’t perform that action at this time.
0 commit comments