Skip to content

Commit d29ae71

Browse files
Make library generally usable by external programs (not just Contour).
1 parent bee6a9b commit d29ae71

15 files changed

+124
-106
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- CMake install target also installs header and library (not just tools).
99
- Reduce number of dependencies down to fmtlib and (for unit tests) Catch2.
1010
- Enables libunicode to be found via CMake's `find_package()`.
11+
- Improved default installation directories on UNIX via GNUInstallDirs helper.
1112

1213
## 0.1.0 (2022-11-03)
1314

src/tools/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ if(LIBUNICODE_TOOLS)
77
# install(TARGETS uc-inspect DESTINATION bin)
88

99
add_executable(unicode-query unicode-query.cpp)
10-
target_link_libraries(unicode-query fmt::fmt-header-only unicode)
10+
target_link_libraries(unicode-query unicode)
1111
install(TARGETS unicode-query DESTINATION bin)
1212
endif()

src/tools/unicode-query.cpp

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
#include <unicode/grapheme_segmenter.h>
1818
#include <unicode/ucd.h>
1919
#include <unicode/ucd_enums.h>
20-
#include <unicode/ucd_fmt.h>
20+
#include <unicode/ucd_ostream.h>
2121
#include <unicode/utf8_grapheme_segmenter.h>
2222

23-
#include <fmt/format.h>
24-
2523
#include <cassert>
2624
#include <charconv>
25+
#include <iomanip>
2726
#include <iostream>
2827
#include <optional>
28+
#include <sstream>
2929
#include <string>
3030

3131
using namespace std;
@@ -35,16 +35,17 @@ namespace
3535

3636
std::string quotedAndEscaped(std::string const& text)
3737
{
38-
auto result = "\""s;
38+
auto result = stringstream {};
39+
result << '"';
3940
for (char const ch: text)
4041
{
4142
if (std::isprint(ch) && ch != '"')
42-
result += ch;
43+
result << ch;
4344
else
44-
result += fmt::format("\\x{:02X}", uint8_t(ch));
45+
result << "\\x" << setw(2) << std::hex << (unsigned(ch) & 0xFF);
4546
}
46-
result += "\"";
47-
return result;
47+
result << "\"";
48+
return result.str();
4849
}
4950

5051
int printUsage(int exitCode)
@@ -86,7 +87,9 @@ vector<char32_t> parseChars(std::string_view text)
8687

8788
string prettyAge(unicode::Age age)
8889
{
89-
string str = fmt::format("{}", age);
90+
// clang-format off
91+
string str = [=]() { auto s = ostringstream(); s << age; return s.str(); }();
92+
// clang-format on
9093
assert(str.at(0) == 'V');
9194
str = str.substr(1);
9295
replace(str.begin(), str.end(), '_', '.');
@@ -98,20 +101,20 @@ void showCodepointProperties(char32_t codepoint)
98101
auto const properties = unicode::codepoint_properties::get(codepoint);
99102

100103
// clang-format off
101-
cout << fmt::format("Name : {}\n", unicode::codepoint_properties::name(codepoint));
102-
cout << fmt::format("Unicode Version : {}\n", prettyAge(properties.age));
103-
cout << fmt::format("Codepoint : U+{:X}\n", uint32_t(codepoint));
104-
cout << fmt::format("UTF-8 : {}\n", quotedAndEscaped(unicode::convert_to<char>(codepoint)));
104+
cout << "Name : " << unicode::codepoint_properties::name(codepoint) << '\n';
105+
cout << "Unicode Version : " << prettyAge(properties.age) << '\n';
106+
cout << "Codepoint : U+" << hex << uint32_t(codepoint) << '\n';
107+
cout << "UTF-8 : " << quotedAndEscaped(unicode::convert_to<char>(codepoint)) << '\n';
105108
if (properties.general_category != unicode::General_Category::Control)
106-
cout << fmt::format("Display : {}\n", unicode::convert_to<char>(codepoint));
107-
cout << fmt::format("Plane : {}\n", unicode::plane(codepoint));
108-
cout << fmt::format("Block : {}\n", unicode::block(codepoint));
109-
cout << fmt::format("Script : {}\n", unicode::script(codepoint));
110-
cout << fmt::format("General Category : {}\n", properties.general_category);
111-
cout << fmt::format("East Asian Width : {}\n", properties.east_asian_width);
112-
cout << fmt::format("Character width : {}\n", properties.char_width);
113-
cout << fmt::format("Emoji Segmentation Category : {}\n", properties.emoji_segmentation_category);
114-
cout << fmt::format("Grapheme Cluster Break : {}\n", properties.grapheme_cluster_break);
109+
cout << "Display : " << unicode::convert_to<char>(codepoint) << '\n';
110+
cout << "Plane : " << unicode::plane(codepoint) << '\n';
111+
cout << "Block : " << unicode::block(codepoint) << '\n';
112+
cout << "Script : " << unicode::script(codepoint) << '\n';
113+
cout << "General Category : " << properties.general_category << '\n';
114+
cout << "East Asian Width : " << properties.east_asian_width << '\n';
115+
cout << "Character width : " << properties.char_width << '\n';
116+
cout << "Emoji Segmentation Category : " << properties.emoji_segmentation_category << '\n';
117+
cout << "Grapheme Cluster Break : " << properties.grapheme_cluster_break << '\n';
115118
cout << "\n";
116119
// clang-format off
117120
}

src/unicode/CMakeLists.txt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
include(GNUInstallDirs)
12

23
# Automatically fetch Unicode database if not present.
34
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ucd.cpp) # if c++ files not auto-generated
@@ -37,7 +38,6 @@ add_library(unicode_ucd STATIC
3738
add_library(unicode::ucd ALIAS unicode_ucd)
3839
target_include_directories(unicode_ucd PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
3940
$<INSTALL_INTERFACE:include>)
40-
target_link_libraries(unicode_ucd PUBLIC fmt::fmt)
4141

4242
# =========================================================================================================
4343

@@ -62,7 +62,7 @@ add_library(unicode_loader STATIC codepoint_properties_loader.h codepoint_proper
6262
add_library(unicode::loader ALIAS unicode_loader)
6363
target_include_directories(unicode_loader PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
6464
$<INSTALL_INTERFACE:include>)
65-
target_link_libraries(unicode_loader PUBLIC fmt::fmt unicode::ucd)
65+
target_link_libraries(unicode_loader PUBLIC unicode::ucd)
6666

6767
# =========================================================================================================
6868

@@ -101,14 +101,15 @@ set_target_properties(unicode PROPERTIES PUBLIC_HEADER "${public_headers}")
101101
add_library(unicode::core ALIAS unicode)
102102
target_include_directories(unicode PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
103103
$<INSTALL_INTERFACE:include>)
104-
target_link_libraries(unicode PUBLIC unicode::ucd fmt::fmt)
104+
target_link_libraries(unicode PUBLIC unicode::ucd)
105105

106106
add_executable(unicode_tablegen tablegen.cpp)
107-
target_link_libraries(unicode_tablegen PRIVATE unicode::loader fmt::fmt)
107+
target_link_libraries(unicode_tablegen PRIVATE unicode::loader)
108108

109109
# {{{ installation
110110
set(LIBUNICODE_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/libunicode" CACHE PATH "Installation directory for cmake files, a relative path that will be joined with ${CMAKE_INSTALL_PREFIX} or an absolute path.")
111111
set(LIBUNICODE_INSTALL_CMAKE_FILES ${MASTER_PROJECT} CACHE BOOL "Decides whether or not to install CMake config and -version files.")
112+
message(NOTICE "HELLO HERE: ${LIBUNICODE_CMAKE_DIR}")
112113

113114
set(INSTALL_TARGETS unicode_ucd unicode_loader unicode)
114115
set(TARGETS_EXPORT_NAME unicode-targets)
@@ -123,6 +124,15 @@ install(TARGETS ${INSTALL_TARGETS}
123124
FRAMEWORK DESTINATION "."
124125
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
125126

127+
install(
128+
FILES
129+
ucd.h
130+
ucd_enums.h
131+
ucd_fmt.h
132+
ucd_ostream.h
133+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/unicode"
134+
)
135+
126136
if(LIBUNICODE_INSTALL_CMAKE_FILES)
127137
set(version "${CMAKE_PROJECT_VERSION}")
128138
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libunicode-config.cmake.in
@@ -158,7 +168,7 @@ if(LIBUNICODE_TESTING)
158168
width_test.cpp
159169
word_segmenter_test.cpp
160170
)
161-
target_link_libraries(unicode_test unicode Catch2::Catch2)
171+
target_link_libraries(unicode_test unicode Catch2::Catch2 fmt::fmt-header-only)
162172
add_test(unicode_test unicode_test)
163173
endif()
164174
# }}}

src/unicode/codepoint_properties_loader.cpp

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,44 +15,16 @@
1515
#include <unicode/support/multistage_table_generator.h>
1616
#include <unicode/support/scoped_timer.h>
1717
#include <unicode/ucd_enums.h>
18-
#include <unicode/ucd_fmt.h>
19-
20-
#include <fmt/format.h>
2118

2219
#include <cassert>
2320
#include <chrono>
21+
#include <fstream>
2422
#include <iostream>
2523
#include <optional>
2624
#include <regex>
2725
#include <string_view>
2826
#include <utility>
2927

30-
// {{{ fmtlib formatters
31-
namespace fmt
32-
{
33-
template <>
34-
struct formatter<unicode::codepoint_properties>
35-
{
36-
template <typename ParseContext>
37-
constexpr auto parse(ParseContext& ctx)
38-
{
39-
return ctx.begin();
40-
}
41-
template <typename FormatContext>
42-
auto format(unicode::codepoint_properties const& value, FormatContext& ctx)
43-
{
44-
return fmt::format_to(ctx.out(),
45-
"({}, {}, {}, {}, {})",
46-
value.emoji() ? "Emoji" : "Text",
47-
value.east_asian_width,
48-
value.script,
49-
value.general_category,
50-
value.grapheme_cluster_break);
51-
}
52-
};
53-
} // namespace fmt
54-
// }}}
55-
5628
using namespace std;
5729
using namespace std::string_view_literals;
5830

@@ -480,7 +452,7 @@ namespace
480452
template <typename T>
481453
void process_properties(string const& filePathSuffix, T callback)
482454
{
483-
auto const _ = scoped_timer { _log, fmt::format("Loading file {}", filePathSuffix) };
455+
auto const _ = scoped_timer { _log, "Loading file " + filePathSuffix };
484456

485457
// clang-format off
486458
// [SPACE] ALNUMDOT ([SPACE] ALNUMDOT)::= (\s+[A-Za-z_0-9\.]+)*

src/unicode/emoji_segmenter.h

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,41 @@ inline std::ostream& operator<<(std::ostream& os, PresentationStyle ps)
126126
return os;
127127
}
128128

129+
inline std::ostream& operator<<(std::ostream& os, EmojiSegmentationCategory value)
130+
{
131+
switch (value)
132+
{
133+
// clang-format off
134+
case unicode::EmojiSegmentationCategory::Invalid: return os << "Invalid";
135+
case unicode::EmojiSegmentationCategory::Emoji: return os << "Emoji";
136+
case unicode::EmojiSegmentationCategory::EmojiTextPresentation: return os << "EmojiTextPresentation";
137+
case unicode::EmojiSegmentationCategory::EmojiEmojiPresentation: return os << "EmojiEmojiPresentation";
138+
case unicode::EmojiSegmentationCategory::EmojiModifierBase: return os << "EmojiModifierBase";
139+
case unicode::EmojiSegmentationCategory::EmojiModifier: return os << "EmojiModifier";
140+
case unicode::EmojiSegmentationCategory::EmojiVSBase: return os << "EmojiVSBase";
141+
case unicode::EmojiSegmentationCategory::RegionalIndicator: return os << "RegionalIndicator";
142+
case unicode::EmojiSegmentationCategory::KeyCapBase: return os << "KeyCapBase";
143+
case unicode::EmojiSegmentationCategory::CombiningEnclosingKeyCap: return os << "CombiningEnclosingKeyCap";
144+
case unicode::EmojiSegmentationCategory::CombiningEnclosingCircleBackslash: return os << "CombiningEnclosingCircleBackslash";
145+
case unicode::EmojiSegmentationCategory::ZWJ: return os << "ZWJ";
146+
case unicode::EmojiSegmentationCategory::VS15: return os << "VS15";
147+
case unicode::EmojiSegmentationCategory::VS16: return os << "VS16";
148+
case unicode::EmojiSegmentationCategory::TagBase: return os << "TagBase";
149+
case unicode::EmojiSegmentationCategory::TagSequence: return os << "TagSequence";
150+
case unicode::EmojiSegmentationCategory::TagTerm: return os << "TagTerm";
151+
// clang-format off
152+
}
153+
return os;
154+
}
155+
129156
} // namespace unicode
130157

131158
// clang-format off
159+
#if __has_include(<fmt/ostream.h>)
132160
#include <fmt/ostream.h>
161+
#if FMT_VERSION >= (9 * 10000 + 1 * 100 + 0)
133162
template <> struct fmt::formatter<unicode::PresentationStyle>: fmt::ostream_formatter {};
134-
// clang-format on
163+
135164

136165
namespace fmt
137166
{
@@ -172,3 +201,6 @@ struct formatter<unicode::EmojiSegmentationCategory>
172201
}
173202
};
174203
}
204+
#endif
205+
#endif
206+
// clang-format on

src/unicode/grapheme_segmenter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
#pragma once
1515

16-
#include <unicode/codepoint_properties_data.h>
16+
#include <unicode/codepoint_properties.h>
1717
#include <unicode/ucd.h>
1818

1919
#include <string_view>

src/unicode/mktables.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,6 @@ def output(self):
200200

201201
def close(self):
202202
self.file.write("} // namespace unicode\n")
203-
self.file.write("\n")
204-
self.file.write("// clang-format off\n")
205-
self.file.write("#include <fmt/ostream.h>\n")
206-
for name in self.names:
207-
self.file.write(f"template <> struct fmt::formatter<unicode::{name}>: fmt::ostream_formatter {{}};\n")
208-
self.file.write("// clang-format off\n")
209203
self.file.close()
210204
# }}}
211205
class EnumFmtWriter(EnumBuilder): # {{{

src/unicode/run_segmenter.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,3 @@ class basic_run_segmenter
178178
using run_segmenter = basic_run_segmenter<script_segmenter, emoji_segmenter>;
179179

180180
} // namespace unicode
181-
182-
// clang-format off
183-
#include <fmt/ostream.h>
184-
template <> struct fmt::formatter<unicode::run_segmenter::range>: fmt::ostream_formatter {};
185-
// clang-format on

src/unicode/run_segmenter_test.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <catch2/catch.hpp>
2222

2323
#include <array>
24+
#include <sstream>
2425
#include <string>
2526
#include <string_view>
2627

@@ -57,11 +58,10 @@ void test_run_segmentation(int lineNo, std::vector<Expectation> const& expectati
5758
unicode::run_segmenter::range actualSegment;
5859
for (size_t i = 0; i < expectations.size(); ++i)
5960
{
60-
INFO(fmt::format("Line {}: run segmentation failed for part {}: \"{}\" to be {}",
61-
lineNo,
62-
i,
63-
to_utf8(expectations[i].text),
64-
expects[i]));
61+
auto s = std::ostringstream {};
62+
s << "Line " << lineNo << ": run segmentation failed for part " << i << ": \""
63+
<< to_utf8(expectations[i].text) << "\" to be " << expects[i];
64+
INFO(s.str());
6565
bool const consumeSuccess = segmenter.consume(out(actualSegment));
6666
REQUIRE(consumeSuccess);
6767
CHECK(actualSegment == expects[i]);

0 commit comments

Comments
 (0)