From aaee2e806d5b4263d37c894dbf6aba7177243f47 Mon Sep 17 00:00:00 2001 From: "TF.Text Team" Date: Fri, 20 Feb 2026 20:59:04 -0800 Subject: [PATCH] No public description PiperOrigin-RevId: 873193850 --- .../core/kernels/spanning_tree_iterator.cc | 23 +++---- .../core/kernels/spanning_tree_iterator.h | 6 +- .../kernels/split_merge_tokenize_kernel.cc | 42 ++++++------- .../kernels/tokenizer_from_logits_kernel.cc | 30 ++++----- .../kernels/unicode_script_tokenize_kernel.cc | 27 ++++---- .../kernels/whitespace_tokenize_kernel.cc | 25 ++++---- .../core/kernels/wordpiece_kernel.cc | 62 +++++++++---------- 7 files changed, 109 insertions(+), 106 deletions(-) diff --git a/tensorflow_text/core/kernels/spanning_tree_iterator.cc b/tensorflow_text/core/kernels/spanning_tree_iterator.cc index 1c859a543..d43f8d25d 100644 --- a/tensorflow_text/core/kernels/spanning_tree_iterator.cc +++ b/tensorflow_text/core/kernels/spanning_tree_iterator.cc @@ -27,14 +27,15 @@ bool SpanningTreeIterator::HasCycle(const SourceList &sources) { visiting_.assign(sources.size(), false); // Search upwards from each node to find cycles. - for (uint32 initial_node = 0; initial_node < sources.size(); ++initial_node) { + for (uint32_t initial_node = 0; initial_node < sources.size(); + ++initial_node) { // Search upwards to try to find a cycle. - uint32 current_node = initial_node; + uint32_t current_node = initial_node; while (true) { if (searched_[current_node]) break; // already searched if (visiting_[current_node]) return true; // revisiting implies cycle visiting_[current_node] = true; // mark as being currently visited - const uint32 source_node = sources[current_node]; + const uint32_t source_node = sources[current_node]; if (source_node == current_node) break; // self-loops are roots current_node = source_node; // advance upwards } @@ -45,7 +46,7 @@ bool SpanningTreeIterator::HasCycle(const SourceList &sources) { if (searched_[current_node]) break; // already searched searched_[current_node] = true; visiting_[current_node] = false; - const uint32 source_node = sources[current_node]; + const uint32_t source_node = sources[current_node]; if (source_node == current_node) break; // self-loops are roots current_node = source_node; // advance upwards } @@ -54,18 +55,18 @@ bool SpanningTreeIterator::HasCycle(const SourceList &sources) { return false; } -uint32 SpanningTreeIterator::NumRoots(const SourceList &sources) { - uint32 num_roots = 0; - for (uint32 node = 0; node < sources.size(); ++node) { +uint32_t SpanningTreeIterator::NumRoots(const SourceList& sources) { + uint32_t num_roots = 0; + for (uint32_t node = 0; node < sources.size(); ++node) { num_roots += (node == sources[node]); } return num_roots; } bool SpanningTreeIterator::NextSourceList(SourceList *sources) { - const uint32 num_nodes = sources->size(); - for (uint32 i = 0; i < num_nodes; ++i) { - const uint32 new_source = ++(*sources)[i]; + const uint32_t num_nodes = sources->size(); + for (uint32_t i = 0; i < num_nodes; ++i) { + const uint32_t new_source = ++(*sources)[i]; if (new_source < num_nodes) return true; // absorbed in this digit (*sources)[i] = 0; // overflowed this digit, carry to next digit } @@ -76,7 +77,7 @@ bool SpanningTreeIterator::NextTree(SourceList *sources) { // Iterate source lists, skipping non-trees. while (NextSourceList(sources)) { // Check the number of roots. - const uint32 num_roots = NumRoots(*sources); + const uint32_t num_roots = NumRoots(*sources); if (forest_) { if (num_roots == 0) continue; } else { diff --git a/tensorflow_text/core/kernels/spanning_tree_iterator.h b/tensorflow_text/core/kernels/spanning_tree_iterator.h index 68bc6f14a..36acdd4ac 100644 --- a/tensorflow_text/core/kernels/spanning_tree_iterator.h +++ b/tensorflow_text/core/kernels/spanning_tree_iterator.h @@ -31,7 +31,7 @@ class SpanningTreeIterator { public: // An array that provides the source of the inbound arc for each node. Roots // are represented as self-loops. - using SourceList = std::vector; + using SourceList = std::vector; // Creates a spanning tree iterator. If |forest| is true, then this iterates // over forests instead of trees (i.e., multiple roots are allowed). @@ -41,7 +41,7 @@ class SpanningTreeIterator { // true) of a complete digraph containing |num_nodes| nodes. Each tree is // passed to the |functor| as a SourceList. template - void ForEachTree(uint32 num_nodes, Functor functor) { + void ForEachTree(uint32_t num_nodes, Functor functor) { // Conveniently, the all-zero vector represents a valid tree. SourceList sources(num_nodes, 0); do { @@ -54,7 +54,7 @@ class SpanningTreeIterator { bool HasCycle(const SourceList &sources); // Returns the number of roots in the |sources|. - static uint32 NumRoots(const SourceList &sources); + static uint32_t NumRoots(const SourceList& sources); // Advances |sources| to the next source list, or returns false if there are // no more source lists. diff --git a/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc b/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc index 5491fab4d..296f244de 100644 --- a/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc +++ b/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc @@ -65,31 +65,31 @@ bool IsBreakChar(absl::string_view text) { return u_isUWhiteSpace(c); } -Status TokenizeByLabel(const absl::string_view& text, - const Tensor& labels_tensor, - bool force_split_at_break_character, - std::vector* tokens, - std::vector* begin_offset, - std::vector* end_offset, int* num_tokens) { +absl::Status TokenizeByLabel(const absl::string_view& text, + const Tensor& labels_tensor, + bool force_split_at_break_character, + std::vector* tokens, + std::vector* begin_offset, + std::vector* end_offset, int* num_tokens) { std::vector chars; if (!GetUTF8Chars(text, &chars)) { - return Status(static_cast<::absl::StatusCode>( - absl::StatusCode::kInvalidArgument), - absl::StrCat("Input string is not utf8 valid: ", text)); + return absl::Status( + static_cast<::absl::StatusCode>(absl::StatusCode::kInvalidArgument), + absl::StrCat("Input string is not utf8 valid: ", text)); } if (chars.size() > labels_tensor.dim_size(0)) { - return Status(static_cast<::absl::StatusCode>( - absl::StatusCode::kInvalidArgument), - absl::StrCat("Number of labels ", labels_tensor.dim_size(0), - " is insufficient for text ", text)); + return absl::Status( + static_cast<::absl::StatusCode>(absl::StatusCode::kInvalidArgument), + absl::StrCat("Number of labels ", labels_tensor.dim_size(0), + " is insufficient for text ", text)); } const int split_label = 0; bool last_character_is_break_character = false; int start = 0; bool has_new_token_generated_for_text = false; - const auto& labels = labels_tensor.unaligned_flat(); + const auto& labels = labels_tensor.unaligned_flat(); for (int i = 0; i < chars.size(); ++i) { const bool is_break_character = IsBreakChar(chars[i]); if (!is_break_character) { @@ -138,14 +138,14 @@ class SplitMergeTokenizeWithOffsetsOp : public OpKernel { " elements, got ", row_splits->dim_size(0))); - std::vector tokens; + std::vector tokens; std::vector begin_offset; std::vector end_offset; std::vector output_row_splits(1, 0); // Iterate through all the values and tokenize them. const auto& values_vec = input_values->flat(); - const auto& row_splits_vec = row_splits->flat(); + const auto& row_splits_vec = row_splits->flat(); for (int i = 0; i < values_vec.size(); ++i) { // Tokenize into tokens and record the offset locations. int num_tokens = 0; @@ -160,10 +160,10 @@ class SplitMergeTokenizeWithOffsetsOp : public OpKernel { output_row_splits.push_back(num_tokens + output_row_splits.back()); } - std::vector output_tokens_shape; + std::vector output_tokens_shape; output_tokens_shape.push_back(tokens.size()); - std::vector output_row_splits_shape; + std::vector output_row_splits_shape; output_row_splits_shape.push_back(output_row_splits.size()); Tensor* output_values; @@ -177,19 +177,19 @@ class SplitMergeTokenizeWithOffsetsOp : public OpKernel { ctx->allocate_output("output_row_splits", TensorShape(output_row_splits_shape), &output_row_splits_tensor)); - auto output_row_splits_vec = output_row_splits_tensor->vec(); + auto output_row_splits_vec = output_row_splits_tensor->vec(); Tensor* start_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", TensorShape(output_tokens_shape), &start_values)); - auto start_values_vec = start_values->vec(); + auto start_values_vec = start_values->vec(); Tensor* limit_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", TensorShape(output_tokens_shape), &limit_values)); - auto limit_values_vec = limit_values->vec(); + auto limit_values_vec = limit_values->vec(); for (int i = 0; i < tokens.size(); ++i) { output_values_vec(i) = tokens[i]; diff --git a/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc b/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc index 39262dd94..bfb5af317 100644 --- a/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc +++ b/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc @@ -68,22 +68,22 @@ bool IsBreakChar(absl::string_view text) { // allows us to retrieve the corresponding data from logits. I.e., the logits // for the i-th character from text are logits(batch_index, i, 0) (for the // "split" action) and logits(batch_index, i, 1) (for the "merge" action). -Status TokenizeByLogits(const absl::string_view& text, - const TTypes::Tensor& logits, - int batch_index, - bool force_split_at_break_character, - std::vector* tokens, - std::vector* begin_offset, - std::vector* end_offset, int* num_tokens) { +absl::Status TokenizeByLogits(const absl::string_view& text, + const TTypes::Tensor& logits, + int batch_index, + bool force_split_at_break_character, + std::vector* tokens, + std::vector* begin_offset, + std::vector* end_offset, int* num_tokens) { std::vector chars; if (!GetUTF8Chars(text, &chars)) { - return Status( + return absl::Status( static_cast(absl::StatusCode::kInvalidArgument), absl::StrCat("Input string is not utf8 valid: ", text)); } if (chars.size() > logits.dimension(1)) { - return Status( + return absl::Status( static_cast(absl::StatusCode::kInvalidArgument), absl::StrCat("Number of logits, ", logits.dimension(1), ", is insufficient for text \"", text, "\"")); @@ -142,7 +142,7 @@ class TokenizerFromLogitsOp : public OpKernel { const bool force_split_at_break_character_bool = force_split_at_break_character->scalar()(); - std::vector tokens; + std::vector tokens; std::vector begin_offset; std::vector end_offset; std::vector output_row_splits(1, 0); @@ -175,10 +175,10 @@ class TokenizerFromLogitsOp : public OpKernel { output_row_splits.push_back(num_tokens + output_row_splits.back()); } - std::vector output_tokens_shape; + std::vector output_tokens_shape; output_tokens_shape.push_back(tokens.size()); - std::vector output_row_splits_shape; + std::vector output_row_splits_shape; output_row_splits_shape.push_back(output_row_splits.size()); Tensor* output_values; @@ -192,19 +192,19 @@ class TokenizerFromLogitsOp : public OpKernel { ctx->allocate_output("row_splits", TensorShape(output_row_splits_shape), &output_row_splits_tensor)); - auto output_row_splits_vec = output_row_splits_tensor->vec(); + auto output_row_splits_vec = output_row_splits_tensor->vec(); Tensor* start_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", TensorShape(output_tokens_shape), &start_values)); - auto start_values_vec = start_values->vec(); + auto start_values_vec = start_values->vec(); Tensor* limit_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", TensorShape(output_tokens_shape), &limit_values)); - auto limit_values_vec = limit_values->vec(); + auto limit_values_vec = limit_values->vec(); for (int i = 0; i < tokens.size(); ++i) { output_values_vec(i) = tokens[i]; diff --git a/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc b/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc index 6217f1bd5..17863a026 100644 --- a/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc +++ b/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc @@ -58,7 +58,7 @@ class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { void Compute(OpKernelContext* context) override { // Get inputs const Tensor& input_values_tensor = context->input(0); - const auto input_values_flat = input_values_tensor.flat(); + const auto input_values_flat = input_values_tensor.flat(); const Tensor& input_splits_tensor = context->input(1); const auto input_splits_flat = input_splits_tensor.flat(); @@ -80,10 +80,10 @@ class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { auto output_outer_splits_flat = output_outer_splits_tensor->flat(); - std::vector output_values; + std::vector output_values; std::vector output_values_inner_splits; - std::vector output_offset_starts; - std::vector output_offset_limits; + std::vector output_offset_starts; + std::vector output_offset_limits; // Loop over the codepoints (a split at a time) and create splits of tokens. icu::ErrorCode status; @@ -92,12 +92,13 @@ class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { output_outer_splits_flat(splits_idx) = output_offset_starts.size(); UScriptCode prev_script = USCRIPT_INVALID_CODE; bool token_has_start_set = false; - int32 curr_skipped_spaces = 0; // Used when computing the end of a token + int32_t curr_skipped_spaces = + 0; // Used when computing the end of a token const int curr_word_start_idx = input_splits_flat(splits_idx); bool was_space = false; for (int values_idx = curr_word_start_idx; values_idx < input_splits_flat(splits_idx + 1); values_idx++) { - const int32 input_value = input_values_flat(values_idx); + const int32_t input_value = input_values_flat(values_idx); const bool is_space = u_isUWhiteSpace(input_value); UScriptCode script = uscript_getScript(input_value, status); // Split these failures out as if they are a different code and ignore @@ -166,11 +167,11 @@ class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { do { \ } while (false) - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32_t); DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values_inner_splits, SPLITS_TYPE); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64_t); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64_t); #undef DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR } @@ -183,12 +184,12 @@ class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("UnicodeScriptTokenizeWithOffsets") .Device(DEVICE_CPU) - .TypeConstraint("Tsplits"), - UnicodeScriptTokenizeWithOffsetsOp); + .TypeConstraint("Tsplits"), + UnicodeScriptTokenizeWithOffsetsOp); REGISTER_KERNEL_BUILDER(Name("UnicodeScriptTokenizeWithOffsets") .Device(DEVICE_CPU) - .TypeConstraint("Tsplits"), - UnicodeScriptTokenizeWithOffsetsOp); + .TypeConstraint("Tsplits"), + UnicodeScriptTokenizeWithOffsetsOp); } // namespace text } // namespace tensorflow diff --git a/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc b/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc index dcdac0c5f..cd1b03c79 100644 --- a/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc +++ b/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc @@ -54,7 +54,7 @@ class WhitespaceTokenizeWithOffsetsOp : public OpKernel { void Compute(OpKernelContext* context) override { // Get inputs const Tensor& input_values_tensor = context->input(0); - const auto input_values_flat = input_values_tensor.flat(); + const auto input_values_flat = input_values_tensor.flat(); const Tensor& input_splits_tensor = context->input(1); const auto input_splits_flat = input_splits_tensor.flat(); @@ -76,17 +76,18 @@ class WhitespaceTokenizeWithOffsetsOp : public OpKernel { auto output_outer_splits_flat = output_outer_splits_tensor->flat(); - std::vector output_values; + std::vector output_values; std::vector output_values_inner_splits; - std::vector output_offset_starts; - std::vector output_offset_limits; + std::vector output_offset_starts; + std::vector output_offset_limits; // Loop over the codepoints (a split at a time) and create splits of tokens. for (int splits_idx = 0; splits_idx < input_splits_flat.size() - 1; splits_idx++) { output_outer_splits_flat(splits_idx) = output_offset_starts.size(); bool token_has_start_set = false; - int32 curr_skipped_spaces = 0; // Used when computing the end of a token + int32_t curr_skipped_spaces = + 0; // Used when computing the end of a token const int curr_word_start_idx = input_splits_flat(splits_idx); for (int values_idx = curr_word_start_idx; values_idx < input_splits_flat(splits_idx + 1); values_idx++) { @@ -135,11 +136,11 @@ class WhitespaceTokenizeWithOffsetsOp : public OpKernel { auto name##_data = name##_tensor->flat().data(); \ memcpy(name##_data, name.data(), name##_size * sizeof(dtype)); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32_t); DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values_inner_splits, SPLITS_TYPE); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64_t); + DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64_t); #undef DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR } @@ -150,12 +151,12 @@ class WhitespaceTokenizeWithOffsetsOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("WhitespaceTokenizeWithOffsets") .Device(DEVICE_CPU) - .TypeConstraint("Tsplits"), - WhitespaceTokenizeWithOffsetsOp); + .TypeConstraint("Tsplits"), + WhitespaceTokenizeWithOffsetsOp); REGISTER_KERNEL_BUILDER(Name("WhitespaceTokenizeWithOffsets") .Device(DEVICE_CPU) - .TypeConstraint("Tsplits"), - WhitespaceTokenizeWithOffsetsOp); + .TypeConstraint("Tsplits"), + WhitespaceTokenizeWithOffsetsOp); } // namespace text } // namespace tensorflow diff --git a/tensorflow_text/core/kernels/wordpiece_kernel.cc b/tensorflow_text/core/kernels/wordpiece_kernel.cc index 8863d80ab..d4dca1bf3 100644 --- a/tensorflow_text/core/kernels/wordpiece_kernel.cc +++ b/tensorflow_text/core/kernels/wordpiece_kernel.cc @@ -34,25 +34,25 @@ namespace tensorflow { namespace text { namespace { -string GetWordSplitChar(OpKernelConstruction* ctx) { - string suffix_indicator; - ([=](string* c) -> void { +std::string GetWordSplitChar(OpKernelConstruction* ctx) { + std::string suffix_indicator; + ([=](std::string* c) -> void { OP_REQUIRES_OK(ctx, ctx->GetAttr("suffix_indicator", c)); })(&suffix_indicator); return suffix_indicator; } -int32 GetMaxCharsPerWord(OpKernelConstruction* ctx) { - int32 max_chars_per_word; - ([=](int32* c) -> void { +int32_t GetMaxCharsPerWord(OpKernelConstruction* ctx) { + int32_t max_chars_per_word; + ([=](int32_t* c) -> void { OP_REQUIRES_OK(ctx, ctx->GetAttr("max_bytes_per_word", c)); })(&max_chars_per_word); return max_chars_per_word; } -int32 GetMaxCharsPerToken(OpKernelConstruction* ctx) { - int32 max_chars_per_token; - ([=](int32* c) -> void { +int32_t GetMaxCharsPerToken(OpKernelConstruction* ctx) { + int32_t max_chars_per_token; + ([=](int32_t* c) -> void { OP_REQUIRES_OK(ctx, ctx->GetAttr("max_chars_per_token", c)); })(&max_chars_per_token); return max_chars_per_token; @@ -66,9 +66,9 @@ bool GetShouldUseUnknownToken(OpKernelConstruction* ctx) { return use_unknown_token; } -string GetUnknownToken(OpKernelConstruction* ctx) { - string unknown_token; - ([=](string* c) -> void { +std::string GetUnknownToken(OpKernelConstruction* ctx) { + std::string unknown_token; + ([=](std::string* c) -> void { OP_REQUIRES_OK(ctx, ctx->GetAttr("unknown_token", c)); })(&unknown_token); return unknown_token; @@ -82,8 +82,8 @@ bool GetSplitUnknownCharacters(OpKernelConstruction* ctx) { return split_unknown_characters; } -Status GetTableHandle(const string& input_name, OpKernelContext* ctx, - string* container, string* table_handle) { +absl::Status GetTableHandle(const std::string& input_name, OpKernelContext* ctx, + std::string* container, std::string* table_handle) { { mutex* mu; TF_RETURN_IF_ERROR(ctx->input_ref_mutex(input_name, &mu)); @@ -105,10 +105,10 @@ Status GetTableHandle(const string& input_name, OpKernelContext* ctx, // Gets the LookupTable stored in the ctx->resource_manager() with key // passed by attribute with name input_name, returns null if the table // doesn't exist. -Status GetLookupTable(const string& input_name, OpKernelContext* ctx, - lookup::LookupInterface** table) { - string container; - string table_handle; +absl::Status GetLookupTable(const std::string& input_name, OpKernelContext* ctx, + lookup::LookupInterface** table) { + std::string container; + std::string table_handle; DataType handle_dtype; TF_RETURN_IF_ERROR(ctx->input_dtype(input_name, &handle_dtype)); if (handle_dtype == DT_RESOURCE) { @@ -135,7 +135,7 @@ class LookupTableVocab : public WordpieceVocab { Tensor default_value_; }; -Status ToStatus(const LookupStatus& status) { +absl::Status ToStatus(const LookupStatus& status) { if (status.success) { return absl::OkStatus(); } @@ -143,12 +143,12 @@ Status ToStatus(const LookupStatus& status) { return errors::InvalidArgument(status.error_msg); } -constexpr int64 kOutOfVocabValue = -1; +constexpr int64_t kOutOfVocabValue = -1; LookupTableVocab::LookupTableVocab(lookup::LookupInterface* table, OpKernelContext* ctx) : table_(table), ctx_(ctx), default_value_(DT_INT64, TensorShape({1})) { - default_value_.flat()(0) = kOutOfVocabValue; + default_value_.flat()(0) = kOutOfVocabValue; } LookupStatus LookupTableVocab::Contains(const absl::string_view key, @@ -171,7 +171,7 @@ LookupStatus LookupTableVocab::Contains(const absl::string_view key, #endif } - if (static_cast(values.flat()(0)) != kOutOfVocabValue) { + if (static_cast(values.flat()(0)) != kOutOfVocabValue) { *value = true; return LookupStatus::OK(); } @@ -191,7 +191,7 @@ class WordpieceTokenizeWithOffsetsOp : public OpKernel { use_unknown_token_(GetShouldUseUnknownToken(ctx)), unknown_token_(GetUnknownToken(ctx)), split_unknown_characters_(GetSplitUnknownCharacters(ctx)) { - string output_row_partition_type; + std::string output_row_partition_type; OP_REQUIRES_OK(ctx, ctx->GetAttr("output_row_partition_type", &output_row_partition_type)); if (output_row_partition_type == "row_lengths") { @@ -216,7 +216,7 @@ class WordpieceTokenizeWithOffsetsOp : public OpKernel { core::ScopedUnref unref_me(lookup_table); LookupTableVocab vocab_map(lookup_table, ctx); - std::vector subwords; + std::vector subwords; std::vector begin_offset; std::vector end_offset; std::vector row_partition; @@ -247,10 +247,10 @@ class WordpieceTokenizeWithOffsetsOp : public OpKernel { } } - std::vector output_subwords_shape; + std::vector output_subwords_shape; output_subwords_shape.push_back(subwords.size()); - std::vector output_row_partition_shape; + std::vector output_row_partition_shape; output_row_partition_shape.push_back(row_partition.size()); Tensor* output_values; @@ -264,19 +264,19 @@ class WordpieceTokenizeWithOffsetsOp : public OpKernel { ctx->allocate_output("output_row_lengths", TensorShape(output_row_partition_shape), &output_row_partition)); - auto output_row_partition_vec = output_row_partition->vec(); + auto output_row_partition_vec = output_row_partition->vec(); Tensor* start_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", TensorShape(output_subwords_shape), &start_values)); - auto start_values_vec = start_values->vec(); + auto start_values_vec = start_values->vec(); Tensor* limit_values; OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", TensorShape(output_subwords_shape), &limit_values)); - auto limit_values_vec = limit_values->vec(); + auto limit_values_vec = limit_values->vec(); for (int i = 0; i < subwords.size(); ++i) { output_values_vec(i) = subwords[i]; @@ -298,11 +298,11 @@ class WordpieceTokenizeWithOffsetsOp : public OpKernel { private: enum RowPartitionType { ROW_LENGTHS, ROW_SPLITS }; - const string suffix_indicator_; + const std::string suffix_indicator_; const int max_bytes_per_word_; const int max_chars_per_token_; const bool use_unknown_token_; - const string unknown_token_; + const std::string unknown_token_; const bool split_unknown_characters_; RowPartitionType row_partition_type_;