Skip to content

Commit 85e501e

Browse files
authored
Merge branch 'main' into reusable_state
2 parents 1cf3b79 + ac385d8 commit 85e501e

28 files changed

+1279
-697
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ install_manifest.txt
2626
*.so
2727
*.dylib
2828
*.dll
29+
lttoolbox/liblttoolbox.so.*
2930

3031
# Fortran module files
3132
*.mod
@@ -46,6 +47,7 @@ install_manifest.txt
4647
/lttoolbox/lt-comp
4748
/lttoolbox/lt-compose
4849
/lttoolbox/lt-proc
50+
/lttoolbox/lt-merge
4951
/lttoolbox/lt-trim
5052
/lttoolbox/Makefile
5153
/lttoolbox/Makefile.in

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
22
cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
33
project(lttoolbox
4-
VERSION 3.7.14
4+
VERSION 3.8.2
55
LANGUAGES CXX C
66
)
77
set(VERSION ${PROJECT_VERSION})

cmake.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
set -e
33
args=()
44

5-
while [[ $# > 0 ]];
5+
while [[ $# -gt 0 ]];
66
do
77
case "$1" in
88
--prefix)

lttoolbox/CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(LIBLTTOOLBOX_HEADERS
2626
sorted_vector.h
2727
sorted_vector.hpp
2828
state.h
29+
stream_reader.h
2930
string_utils.h
3031
symbol_iter.h
3132
tmx_compiler.h
@@ -57,6 +58,7 @@ set(LIBLTTOOLBOX_SOURCES
5758
reusable_state.cc
5859
sorted_vector.cc
5960
state.cc
61+
stream_reader.cc
6062
string_utils.cc
6163
symbol_iter.cc
6264
tmx_compiler.cc
@@ -103,6 +105,9 @@ target_link_libraries(lt-comp lttoolbox ${GETOPT_LIB})
103105
add_executable(lt-proc lt_proc.cc)
104106
target_link_libraries(lt-proc lttoolbox ${GETOPT_LIB})
105107

108+
add_executable(lt-merge lt_merge.cc)
109+
target_link_libraries(lt-merge lttoolbox ${GETOPT_LIB})
110+
106111
add_executable(lt-expand lt_expand.cc)
107112
target_link_libraries(lt-expand lttoolbox ${GETOPT_LIB})
108113

@@ -144,11 +149,11 @@ install(TARGETS lttoolbox
144149
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
145150
install(FILES ${LIBLTTOOLBOX_HEADERS}
146151
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lttoolbox)
147-
install(TARGETS lt-append lt-print lt-trim lt-compose lt-comp lt-proc lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-invert lt-restrict lt-apply-acx
152+
install(TARGETS lt-append lt-print lt-trim lt-compose lt-comp lt-proc lt-merge lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-invert lt-restrict lt-apply-acx
148153
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
149154

150155
install(FILES dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd
151156
DESTINATION ${CMAKE_INSTALL_DATADIR}/lttoolbox)
152157

153-
install(FILES lt-append.1 lt-comp.1 lt-expand.1 lt-paradigm.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1 lt-trim.1 lt-compose.1
158+
install(FILES lt-append.1 lt-comp.1 lt-expand.1 lt-paradigm.1 lt-proc.1 lt-merge.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1 lt-trim.1 lt-compose.1
154159
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)

lttoolbox/att_compiler.cc

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
#include <lttoolbox/alphabet.h>
2020
#include <lttoolbox/transducer.h>
2121
#include <lttoolbox/compression.h>
22-
#include <lttoolbox/string_utils.h>
2322
#include <lttoolbox/file_utils.h>
2423
#include <algorithm>
24+
#include <limits>
2525
#include <stack>
2626
#include <unicode/uchar.h>
2727
#include <unicode/ustring.h>
@@ -141,6 +141,53 @@ AttCompiler::add_transition(int from, int to,
141141
}
142142
}
143143

144+
/*
145+
* ICU number parsing does a lot of locale handling and this tends to
146+
* dominate the runtime of lt-comp. Since we always force the locale
147+
* to be C.UTF-8, we can make stronger assumptions leading to a
148+
* ~2000x speedup for stoi and ~500x for stod relative to StringUtils.
149+
* - DGS 2025-08-29
150+
*/
151+
152+
int fast_stoi(UString s) {
153+
int ret = 0;
154+
for (size_t i = 0; i < s.size(); i++) {
155+
if (s[i] < '0' || s[i] > '9') throw std::invalid_argument("bad int");
156+
ret *= 10;
157+
ret += (s[i] - '0');
158+
}
159+
return ret;
160+
}
161+
162+
double fast_stod(UString s) {
163+
if (s.size() == 0) throw std::invalid_argument("empty string");
164+
double sign = 1;
165+
size_t i = 0;
166+
if (s[i] == '-') {
167+
i++;
168+
sign = -1;
169+
}
170+
if (i == s.size()) throw std::invalid_argument("no number");
171+
if (i + 3 == s.size() && s[i] == 'i' && s[i+1] == 'n' && s[i+2] == 'f') {
172+
return sign * std::numeric_limits<double>::infinity();
173+
}
174+
double ret = 0;
175+
for (; i < s.size(); i++) {
176+
if (s[i] == '.') break;
177+
if (s[i] < '0' || s[i] > '9') throw std::invalid_argument("bad digit");
178+
ret *= 10;
179+
ret += (s[i] - '0');
180+
}
181+
i++;
182+
double mul = 0.1;
183+
for (; i < s.size(); i++) {
184+
if (s[i] < '0' || s[i] > '9') throw std::invalid_argument("bad digit");
185+
ret += (s[i] - '0') * mul;
186+
mul *= 0.1;
187+
}
188+
return sign * ret;
189+
}
190+
144191
void
145192
AttCompiler::parse(std::string const &file_name, bool read_rl)
146193
{
@@ -208,7 +255,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl)
208255
}
209256

210257
try {
211-
from = StringUtils::stoi(tokens[0]) + state_id_offset;
258+
from = fast_stoi(tokens[0]) + state_id_offset;
212259
} catch (const std::invalid_argument& e) {
213260
std::cerr << "Error: invalid source state in file '" << file_name << "' on line " << line_number << "." << std::endl;
214261
exit(EXIT_FAILURE);
@@ -233,7 +280,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl)
233280
if (tokens.size() > 1)
234281
{
235282
try {
236-
weight = StringUtils::stod(tokens[1]);
283+
weight = fast_stod(tokens[1]);
237284
} catch (const std::invalid_argument& e) {
238285
std::cerr << "Error: invalid weight in file '" << file_name << "' on line " << line_number << "." << std::endl;
239286
exit(EXIT_FAILURE);
@@ -248,7 +295,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl)
248295
else
249296
{
250297
try {
251-
to = StringUtils::stoi(tokens[1]) + state_id_offset;
298+
to = fast_stoi(tokens[1]) + state_id_offset;
252299
} catch (const std::invalid_argument& e) {
253300
std::cerr << "Error: invalid target state in file '" << file_name << "' on line " << line_number << "." << std::endl;
254301
exit(EXIT_FAILURE);
@@ -269,7 +316,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl)
269316
if(tokens.size() > 4)
270317
{
271318
try {
272-
weight = StringUtils::stod(tokens[4]);
319+
weight = fast_stod(tokens[4]);
273320
} catch (const std::invalid_argument& e) {
274321
std::cerr << "Error: invalid weight in file '" << file_name << "' on line " << line_number << "." << std::endl;
275322
exit(EXIT_FAILURE);

0 commit comments

Comments
 (0)