Skip to content

Commit 01ebc56

Browse files
authored
Merge pull request #1267 from nukyan/fixes
Fix several wrongs
2 parents 6eaf061 + 162767d commit 01ebc56

5 files changed

Lines changed: 57 additions & 104 deletions

File tree

include/fast_io_core_impl/dynamic_output_buffer.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,25 @@ write_all_overflow_define_impl(basic_generic_dynamic_output_buffer<char_type, bu
6767
::std::size_t bfsz{static_cast<::std::size_t>(bob.end_ptr - bob.begin_ptr)};
6868
::std::size_t rlsz{static_cast<::std::size_t>(bob.curr_ptr - bob.begin_ptr)};
6969
::std::size_t diff{static_cast<::std::size_t>(last - first)};
70-
::std::size_t to_allocate{bfsz + diff};
71-
::std::size_t twicebfsz;
7270
constexpr ::std::size_t mx{::std::numeric_limits<::std::size_t>::max()};
71+
::std::size_t to_allocate;
72+
if (bfsz > mx - diff)
73+
{
74+
to_allocate = mx;
75+
}
76+
else
77+
{
78+
to_allocate = bfsz + diff;
79+
}
80+
::std::size_t twicebfsz;
7381
constexpr ::std::size_t mxdv2{mx >> 1u};
7482
if (bfsz > mxdv2)
7583
{
7684
twicebfsz = mx;
7785
}
7886
else
7987
{
80-
twicebfsz = bfsz;
88+
twicebfsz = bfsz << 1u;
8189
}
8290
if (to_allocate < twicebfsz)
8391
{

include/fast_io_core_impl/read_all.h

Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -34,101 +34,7 @@ read_all_impl_decay_cold(input in, typename input::char_type *first, typename in
3434
}
3535
}
3636

37-
#if 0
38-
template<typename char_type,::std::input_or_output_iterator output_iter>
39-
inline output_iter type_punning_copy(char_type const* first,char_type const* last,output_iter result)
40-
{
41-
using value_type = ::std::iter_value_t<output_iter>;
42-
if constexpr(sizeof(value_type)==0)
43-
return result;
44-
else if constexpr(sizeof(value_type)==sizeof(char_type))
45-
{
46-
for(;first!=last;)
47-
{
48-
if constexpr(::std::same_as<char_type,value_type>)
49-
*result=*first;
50-
else
51-
{
52-
my_memcpy(__builtin_addressof(result),first,sizeof(value_type));
53-
}
54-
++first;
55-
++result;
56-
}
57-
return result;
58-
}
59-
else
60-
{
61-
static_assert(sizeof(char_type)==1);
62-
for(;first!=last;)
63-
{
64-
my_memcpy(__builtin_addressof(result),first,sizeof(value_type));
65-
first+=sizeof(value_type);
66-
++result;
67-
}
68-
return result;
69-
}
70-
}
7137

72-
template<::fast_io::input_stream input,::std::forward_iterator Iter>
73-
inline constexpr void read_all_impl_none_contiguous(input in,Iter first,Iter last)
74-
{
75-
using char_type = typename input::char_type;
76-
using iter_value_type = ::std::iter_value_t<Iter>;
77-
if constexpr(::fast_io::buffer_input_stream<input>)
78-
{
79-
auto to_read{::std::distance(first,last)};
80-
for(;to_read;)
81-
{
82-
auto curr{ibuffer_curr(in)};
83-
auto ed{ibuffer_end(in)};
84-
auto remains{ed-curr};
85-
if constexpr(sizeof(char_type)==sizeof(iter_value_type))
86-
{
87-
if(to_read<remains)
88-
{
89-
remains=to_read;
90-
to_read=0;
91-
}
92-
else
93-
{
94-
to_read-=remains;
95-
}
96-
}
97-
else
98-
{
99-
::std::size_t real_to_read{static_cast<::std::size_t>(to_read)*sizeof(iter_value_type)};
100-
if(real_to_read<remains)
101-
{
102-
remains=real_to_read;
103-
to_read=0;
104-
}
105-
else
106-
{
107-
to_read-=remains/;
108-
}
109-
}
110-
111-
first=type_punning_copy(curr,curr+remains,first);
112-
ibuffer_set_curr(in,curr);
113-
if(curr)
114-
}
115-
}
116-
else
117-
{
118-
constexpr ::std::size_t buffer_size{512};
119-
char_type buffer[buffer_size];
120-
auto to_read{::std::distance(first,last)};
121-
for(;to_read;)
122-
{
123-
::std::size_t read_this_round{buffer_size};
124-
if(to_read>=buffer_size)
125-
{
126-
127-
}
128-
}
129-
}
130-
}
131-
#endif
13238
template <::fast_io::input_stream input>
13339
inline constexpr void read_all_impl_decay(input in, typename input::char_type *first, typename input::char_type *last)
13440
{

include/fast_io_core_impl/simd_find.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ inline constexpr auto create_find_simd_vector_with_unsigned_toggle(char_type val
77
{
88
::fast_io::freestanding::array<char_type, N> arr;
99
using signed_char_type = ::std::make_signed_t<char_type>;
10-
using unsigned_char_type = ::std::make_signed_t<char_type>;
10+
using unsigned_char_type = ::std::make_unsigned_t<char_type>;
1111
constexpr unsigned_char_type signed_min_unsigned_val{
1212
static_cast<unsigned_char_type>(::std::numeric_limits<signed_char_type>::min())};
1313
if constexpr (signed_disposition)
@@ -213,7 +213,7 @@ inline constexpr char_type const *find_space_simd_common_impl(char_type const *f
213213
else
214214
{
215215
using unsigned_char_type = ::std::make_unsigned_t<::std::remove_cvref_t<char_type>>;
216-
using signed_char_type = ::std::make_unsigned_t<unsigned_char_type>;
216+
using signed_char_type = ::std::make_signed_t<unsigned_char_type>;
217217
constexpr char_type spacech{char_literal_v<u8' ', ::std::remove_cvref_t<char_type>>};
218218
constexpr char_type horizontaltab{char_literal_v<u8'\t', ::std::remove_cvref_t<char_type>>};
219219
constexpr char_type verticaltab{char_literal_v<u8'\v', ::std::remove_cvref_t<char_type>>};

include/fast_io_crypto/cipher/aes.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct aes
5858
{
5959
inline static constexpr ::std::size_t block_size = 16;
6060
inline static constexpr ::std::size_t key_size = keysize;
61-
inline static constexpr ::std::size_t key_schedule_size = keysize == 16 ? 10 : (keysize == 24 ? 12 : 15);
61+
inline static constexpr ::std::size_t key_schedule_size = keysize == 16 ? 11 : (keysize == 24 ? 13 : 15);
6262
__m128i key_schedule[key_schedule_size];
6363
inline explicit aes(::std::span<::std::byte const, key_size> key_span) noexcept
6464
{
@@ -135,9 +135,49 @@ struct aes
135135
key_schedule[13] = aes_256_key_exp_2(key_schedule[11], key_schedule[12]);
136136
key_schedule[14] = aes_256_key_exp(key_schedule[12], key_schedule[13], 0x40);
137137
}
138+
if constexpr (decrypt)
139+
{
140+
// Prepare decryption key schedule: reverse order and apply InvMixColumns
141+
for (::std::size_t i{}, j{key_schedule_size - 1}; i < j; ++i, --j)
142+
{
143+
__m128i tmp = key_schedule[i];
144+
key_schedule[i] = key_schedule[j];
145+
key_schedule[j] = tmp;
146+
}
147+
for (::std::size_t i{1}; i < key_schedule_size - 1; ++i)
148+
{
149+
key_schedule[i] = _mm_aesimc_si128(key_schedule[i]);
150+
}
151+
}
138152
}
139153
inline void operator()(::std::byte const *from, ::std::size_t blocks, ::std::byte *to) noexcept
140-
{}
154+
{
155+
constexpr ::std::size_t rounds = key_schedule_size - 1;
156+
for (::std::size_t i{}; i != blocks; ++i)
157+
{
158+
__m128i block = _mm_loadu_si128(reinterpret_cast<__m128i const *>(from));
159+
block = _mm_xor_si128(block, key_schedule[0]);
160+
if constexpr (!decrypt)
161+
{
162+
for (::std::size_t j{1}; j != rounds; ++j)
163+
{
164+
block = _mm_aesenc_si128(block, key_schedule[j]);
165+
}
166+
block = _mm_aesenclast_si128(block, key_schedule[rounds]);
167+
}
168+
else
169+
{
170+
for (::std::size_t j{1}; j != rounds; ++j)
171+
{
172+
block = _mm_aesdec_si128(block, key_schedule[j]);
173+
}
174+
block = _mm_aesdeclast_si128(block, key_schedule[rounds]);
175+
}
176+
_mm_storeu_si128(reinterpret_cast<__m128i *>(to), block);
177+
from += block_size;
178+
to += block_size;
179+
}
180+
}
141181
};
142182

143183
} // namespace fast_io

include/fast_io_i18n/lc_numbers/cond.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,11 @@ inline constexpr char_type *cond_lc_print_reserve_define_impl(basic_lc_all<char_
102102
}
103103
else if constexpr (reserve_printable<char_type, T1>)
104104
{
105-
constexpr ::std::size_t sz{print_reserve_size(io_reserve_type<char_type, T1>)};
106-
return sz;
105+
return print_reserve_define(io_reserve_type<char_type, T1>, iter, c);
107106
}
108107
else
109108
{
110-
return print_reserve_size(io_reserve_type<char_type, T1>, c);
109+
return print_reserve_define(io_reserve_type<char_type, T1>, iter, c);
111110
}
112111
}
113112

0 commit comments

Comments
 (0)