diff --git a/.github/workflows/cloud_code_scan.yml b/.github/workflows/cloud_code_scan.yml new file mode 100644 index 0000000000..ee6329165f --- /dev/null +++ b/.github/workflows/cloud_code_scan.yml @@ -0,0 +1,14 @@ +name: Alipay Cloud Devops Codescan +on: + pull_request_target: + +jobs: + deployment: + runs-on: ubuntu-latest + steps: + - name: codeScan + if: ${{ github.repository == 'TuGraph-family/tugraph-db' }} + uses: TuGraph-family/alipay-cloud-devops-codescan@main + with: + parent_uid: ${{ secrets.ALI_PID }} + private_key: ${{ secrets.ALI_PK }} diff --git a/deps/geax-front-end/cmake/Modules/FindAntlr4.cmake b/deps/geax-front-end/cmake/Modules/FindAntlr4.cmake index 43f261b08f..740f09735d 100644 --- a/deps/geax-front-end/cmake/Modules/FindAntlr4.cmake +++ b/deps/geax-front-end/cmake/Modules/FindAntlr4.cmake @@ -1,9 +1,8 @@ -#if ("$ENV{JAVA_HOME}" STREQUAL "") -# set(Java_ROOT ${GEAX_THIRD_PARTY_DIR}/jdk-11.0.2) -#else() -# set(Java_ROOT "$ENV{JAVA_HOME}") -#endif() -set(Java_ROOT /usr/lib/jvm/java-11-openjdk-11.0.13.0.8-1.el8_4.x86_64) +if ("$ENV{JAVA_HOME}" STREQUAL "") + set(Java_ROOT ${GEAX_THIRD_PARTY_DIR}/jdk-11.0.2) +else() + set(Java_ROOT "$ENV{JAVA_HOME}") +endif() set(Java_JAVA_EXECUTABLE ${Java_ROOT}/bin/java) find_package(Java QUIET COMPONENTS Runtime) diff --git a/src/core/field_extractor.cpp b/src/core/field_extractor.cpp index ee4cc04cb2..89b78a5aba 100644 --- a/src/core/field_extractor.cpp +++ b/src/core/field_extractor.cpp @@ -16,6 +16,74 @@ namespace lgraph { namespace _detail { + +void FieldExtractor::_SetFixedSizeValueRaw(Value& record, const Value& data) const { + // "Cannot call SetField(Value&, const T&) on a variable length field"; + FMA_DBG_ASSERT(!is_vfield_); + // "Type size mismatch" + FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type)); + FMA_DBG_CHECK_EQ(data.Size(), GetDataSize(record)); + // copy the buffer so we don't accidentally overwrite memory + char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id); + memcpy(ptr, data.Data(), data.Size()); +} + +// set field value to null +void FieldExtractor::SetIsNull(const Value& record, const bool is_null) const { + if (!def_.optional) { + if (is_null) throw FieldCannotBeSetNullException(Name()); + return; + } + // set the Kth bit from NullArray + char* arr = GetNullArray(record); + if (is_null) { + arr[def_.id / 8] |= (0x1 << (def_.id % 8)); + } else { + arr[def_.id / 8] &= ~(0x1 << (def_.id % 8)); + } +} + +size_t FieldExtractor::GetDataSize(const Value& record) const { + if (is_vfield_) { + DataOffset var_offset = ::lgraph::_detail::UnalignedGet( + record.Data() + GetFieldOffset(record, def_.id)); + // The length is stored at the beginning of the variable-length field data area. + return ::lgraph::_detail::UnalignedGet(record.Data() + var_offset); + } else { + return GetFieldOffset(record, def_.id + 1) - GetFieldOffset(record, def_.id); + } +} + +FieldId FieldExtractor::GetRecordCount(const Value& record) const { + return ::lgraph::_detail::UnalignedGet(record.Data() + count_offset_); +} + +/** Retrieve the starting position of the Field data for the given ID. + * Note that both fixed-length and variable-length data are not distinguished here. + */ +size_t FieldExtractor::GetFieldOffset(const Value& record, const FieldId id) const { + const uint16_t count = GetRecordCount(record); + if (0 == id) { + // The starting position of Field0 is at the end of the offset section. + return nullarray_offset_ + (count + 7) / 8 + count * sizeof(DataOffset); + } + + size_t offset = 0; + offset = nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset); + return ::lgraph::_detail::UnalignedGet(record.Data() + offset); +} + +size_t FieldExtractor::GetOffsetPosition(const Value& record, const FieldId id) const { + const FieldId count = GetRecordCount(record); + if (0 == id) { + return 0; + } + return nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset); +} +void* FieldExtractor::GetFieldPointer(const Value& record) const { + return (char*)record.Data() + GetFieldOffset(record, def_.id); +} + /** * Print the string representation of the field. For digital types, it prints * it into ASCII string; for NBytes and String, it just copies the content of diff --git a/src/core/field_extractor.h b/src/core/field_extractor.h index 5802cdbfec..17486d7f96 100644 --- a/src/core/field_extractor.h +++ b/src/core/field_extractor.h @@ -143,7 +143,7 @@ class FieldExtractor { void SetLabelInRecord(const bool label_in_record) { label_in_record_ = label_in_record; - count_offset_ = sizeof(VersionId) + label_in_record ? sizeof(LabelId) : 0; + count_offset_ = sizeof(VersionId) + (label_in_record ? sizeof(LabelId) : 0); nullarray_offset_ = count_offset_ + sizeof(FieldId); } @@ -353,43 +353,19 @@ class FieldExtractor { return Value(decoded); } - template - void _ParseStringAndSet(Value& record, const std::string& data) const; - void SetVariableOffset(Value& record, FieldId id, DataOffset offset) const { size_t off = GetFieldOffset(record, id); ::lgraph::_detail::UnalignedSet(record.Data() + off, offset); } - void _SetFixedSizeValueRaw(Value& record, const Value& data) const { - // "Cannot call SetField(Value&, const T&) on a variable length field"; - FMA_DBG_ASSERT(!is_vfield_); - // "Type size mismatch" - FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type)); - FMA_DBG_CHECK_EQ(data.Size(), GetDataSize(record)); - // copy the buffer so we don't accidentally overwrite memory - char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id); - memcpy(ptr, data.Data(), data.Size()); - } + void _SetFixedSizeValueRaw(Value& record, const Value& data) const; // set field value to null - void SetIsNull(const Value& record, const bool is_null) const { - if (!def_.optional) { - if (is_null) throw FieldCannotBeSetNullException(Name()); - return; - } - // set the Kth bit from NullArray - char* arr = GetNullArray(record); - if (is_null) { - arr[def_.id / 8] |= (0x1 << (def_.id % 8)); - } else { - arr[def_.id / 8] &= ~(0x1 << (def_.id % 8)); - } - } + void SetIsNull(const Value& record, const bool is_null) const; /** * Extracts field data from the record to the buffer pointed to by data. This - * is for internal use only, the size MUST match the data size defined in schema. + * is for internal use only, the size MUST match the data size in record. * * \param record The record. * \param [in,out] data If non-null, the data. @@ -399,54 +375,28 @@ class FieldExtractor { */ void GetCopyRaw(const Value& record, void* data, size_t size) const { size_t off = GetFieldOffset(record, def_.id); - FMA_DBG_ASSERT(off + size <= record.Size()); - memcpy(data, record.Data() + off, size); - } - - char* GetNullArray(const Value& record) const { return record.Data() + nullarray_offset_; } - - size_t GetDataSize(const Value& record) const { if (is_vfield_) { - DataOffset var_offset = ::lgraph::_detail::UnalignedGet( - record.Data() + GetFieldOffset(record, def_.id)); - DataOffset var_data_offset = - ::lgraph::_detail::UnalignedGet(record.Data() + var_offset); - // The length is stored at the beginning of the variable-length field data area. - return ::lgraph::_detail::UnalignedGet(record.Data() + var_data_offset); + off = ::lgraph::_detail::UnalignedGet(record.Data() + off); + FMA_DBG_ASSERT(off + size + sizeof(DataOffset) <= record.Size()); + memcpy(data, record.Data() + off + sizeof(DataOffset), size); } else { - return GetFieldOffset(record, def_.id + 1) - GetFieldOffset(record, def_.id); + FMA_DBG_ASSERT(off + size <= record.Size()); + memcpy(data, record.Data() + off, size); } } - FieldId GetRecordCount(const Value& record) const { - return ::lgraph::_detail::UnalignedGet(record.Data() + count_offset_); - } + char* GetNullArray(const Value& record) const { return record.Data() + nullarray_offset_; } + + size_t GetDataSize(const Value& record) const; + + FieldId GetRecordCount(const Value& record) const; /** Retrieve the starting position of the Field data for the given ID. * Note that both fixed-length and variable-length data are not distinguished here. */ - size_t GetFieldOffset(const Value& record, const FieldId id) const { - const uint16_t count = GetRecordCount(record); - if (0 == id) { - // The starting position of Field0 is at the end of the offset section. - return nullarray_offset_ + (count + 7) / 8 + count * sizeof(DataOffset); - } - - size_t offset = 0; - offset = nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset); - return ::lgraph::_detail::UnalignedGet(record.Data() + offset); - } - - size_t GetOffsetPosition(const Value& record, const FieldId id) const { - const FieldId count = GetRecordCount(record); - if (0 == id) { - return 0; - } - return nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset); - } - void* GetFieldPointer(const Value& record) const { - return (char*)record.Data() + GetFieldOffset(record, def_.id); - } + size_t GetFieldOffset(const Value& record, const FieldId id) const; + size_t GetOffsetPosition(const Value& record, const FieldId id) const; + void* GetFieldPointer(const Value& record) const; }; } // namespace _detail diff --git a/src/core/schema.cpp b/src/core/schema.cpp index a70654130c..be0a947aeb 100644 --- a/src/core/schema.cpp +++ b/src/core/schema.cpp @@ -737,14 +737,17 @@ void Schema::_SetVariableLengthValue(Value& record, const Value& data, record.Resize(record.Size()); // move data to the correct position - int32_t diff = data.Size() + sizeof(uint32_t) - fsize; + int32_t diff = data.Size() - fsize; if (diff > 0) { record.Resize(record.Size() + diff); - memmove(rptr + variable_offset + sizeof(data), rptr + variable_offset + fsize, - record.Size() - (variable_offset + sizeof(data))); + rptr = (char*)record.Data(); + memmove(rptr + variable_offset + sizeof(DataOffset) + data.Size(), + rptr + variable_offset + sizeof(DataOffset) + fsize, + record.Size() - (variable_offset + sizeof(DataOffset) + data.Size())); } else { - memmove(rptr + variable_offset + sizeof(data), rptr + variable_offset + fsize, - record.Size() - (variable_offset + fsize)); + memmove(rptr + variable_offset + sizeof(DataOffset) + data.Size(), + rptr + variable_offset + sizeof(DataOffset) + fsize, + record.Size() - (variable_offset + sizeof(DataOffset) + fsize)); record.Resize(record.Size() + diff); } @@ -758,7 +761,7 @@ void Schema::_SetVariableLengthValue(Value& record, const Value& data, // update offset of other veriable fields size_t count = extractor->GetRecordCount(record); // adjust offset of other fields - for (size_t i = extractor->GetFieldId(); i < count; i++) { + for (size_t i = extractor->GetFieldId() + 1; i < count; i++) { if (fields_[i].IsFixedType()) continue; size_t offset = extractor->GetFieldOffset(record, i); size_t var_offset = ::lgraph::_detail::UnalignedGet(rptr + offset); @@ -783,7 +786,6 @@ void Schema::_ParseStringAndSet(Value& record, const std::string& data, typedef typename field_data_helper::FieldType2StorageType::type ST; CT s{}; size_t tmp = fma_common::TextParserUtils::ParseT(data.data(), data.data() + data.size(), s); - // error maybe there if (_F_UNLIKELY(tmp != data.size())) throw ParseStringException(extractor->Name(), data, FT); return SetFixedSizeValue(record, static_cast(s), extractor); } @@ -1038,12 +1040,11 @@ void Schema::SetSchema(bool is_vertex, size_t n_fields, const FieldSpec* fields, const EdgeConstraints& edge_constraints) { lgraph::CheckValidFieldNum(n_fields); fields_.clear(); + blob_fields_.clear(); name_to_idx_.clear(); fields_.reserve(n_fields); for (size_t i = 0; i < n_fields; i++) { fields_.emplace_back(fields[i]); - fields_[i].SetLabelInRecord(label_in_record_); - name_to_idx_[fields[i].name] = fields[i].id; } std::sort(fields_.begin(), fields_.end(), [](const _detail::FieldExtractor& a, const _detail::FieldExtractor& b) { @@ -1055,6 +1056,17 @@ void Schema::SetSchema(bool is_vertex, size_t n_fields, const FieldSpec* fields, throw FieldIdConflictException(fields_[i].Name(), fields_[i - 1].Name()); } } + for (auto& f : fields_) { + if (f.Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f.Name()); + if (_F_UNLIKELY(name_to_idx_.find(f.Name()) != name_to_idx_.end())) + throw FieldAlreadyExistsException(f.Name()); + name_to_idx_[f.Name()] = f.GetFieldId(); + if (f.Type() == FieldType::BLOB) { + blob_fields_.push_back(f.GetFieldId()); + } + f.SetLabelInRecord(label_in_record_); + } + is_vertex_ = is_vertex; primary_field_ = primary; temporal_field_ = temporal; diff --git a/src/core/schema.h b/src/core/schema.h index 3344cfb143..5bd62837c7 100644 --- a/src/core/schema.h +++ b/src/core/schema.h @@ -626,45 +626,8 @@ class Schema { s = BinaryRead(buf, detach_property_); if (!s) return 0; bytes_read += s; - FieldId pro_count = 0; - fields_.reserve(fds.size()); - name_to_idx_.clear(); - indexed_fields_.clear(); - fulltext_fields_.clear(); - bool found_primary = false; - for (const auto& f : fds) { - fields_[f.id] = _detail::FieldExtractor(f); - fields_[f.id].SetLabelInRecord(label_in_record_); - if (f.id >= pro_count) { - pro_count = f.id; - } - if (_F_UNLIKELY(name_to_idx_.find(f.name) != name_to_idx_.end())) { - throw FieldAlreadyExistsException(f.name); - } - name_to_idx_[f.name] = f.id; - if (fields_[f.id].GetVertexIndex() || fields_[f.id].GetEdgeIndex()) { - indexed_fields_.emplace_hint(indexed_fields_.end(), f.id); - if (f.name == primary_field_) { - FMA_ASSERT(!found_primary); - found_primary = true; - } - } - if (fields_[f.id].FullTextIndexed()) { - fulltext_fields_.emplace(f.id); - } - } - - if (is_vertex_ && !indexed_fields_.empty()) { - FMA_ASSERT(found_primary); - } - - if (pro_count != fds.size() - 1) { - std::string err_msg = - FMA_FMT("Schema fields deserialize error, fields num: {}, max id: {}.", - _detail::MAX_GRAPH_SIZE, fds.size(), pro_count); - throw std::runtime_error(err_msg); - } - + SetSchema(is_vertex_, fds, primary_field_, temporal_field_, temporal_order_, + edge_constraints_); return bytes_read; } diff --git a/test/test_schema.cpp b/test/test_schema.cpp index 9f7cf4ca64..9c6b0bc846 100644 --- a/test/test_schema.cpp +++ b/test/test_schema.cpp @@ -141,6 +141,7 @@ TEST_F(TestSchema, GetFieldId) { TEST_F(TestSchema, DumpRecord) { Value v_old("name"); + Value v_new("name1"); Schema schema(false); Schema schema_1(true); @@ -194,12 +195,13 @@ TEST_F(TestSchema, DumpRecord) { std::vector value{"peter", "101", "65.25", "49", "fifth avenue"}; Value record = schema.CreateRecord(fid.size(), fid.data(), value.data()); // UT_LOG() << "record: " << schema.DumpRecord(record); - schema.GetFieldId("float"); - schema.GetFieldExtractor("name"); - schema.GetFieldExtractor("uid"); - schema.GetFieldExtractor("weight"); - schema.GetFieldExtractor("age"); - schema.GetFieldExtractor("addr"); + + UT_EXPECT_EQ(schema.GetFieldId("float"), 5); + UT_EXPECT_EQ(schema.GetFieldExtractor("name")->FieldToString(record), "peter"); + UT_EXPECT_EQ(schema.GetFieldExtractor("uid")->FieldToString(record), "101"); + UT_EXPECT_EQ(schema.GetFieldExtractor("weight")->FieldToString(record), "6.525e1"); + UT_EXPECT_EQ(schema.GetFieldExtractor("age")->FieldToString(record), "49"); + UT_EXPECT_EQ(schema.GetFieldExtractor("addr")->FieldToString(record), "fifth avenue"); UT_EXPECT_THROW_CODE(schema.GetFieldExtractor("hash"), FieldNotFound); UT_EXPECT_THROW_CODE(schema.GetFieldExtractor(1024), FieldNotFound); const _detail::FieldExtractor fe_temp = *(schema.GetFieldExtractor("name"));