diff --git a/include/opendht/indexation/pht.h b/include/opendht/indexation/pht.h index 0d679414bfdf3fdeb97ecf041e2a16f282db6c5c..1c5557af5dd66e762c643a287996e7e47625d352 100644 --- a/include/opendht/indexation/pht.h +++ b/include/opendht/indexation/pht.h @@ -35,8 +35,9 @@ struct Prefix { size_(std::min(first, p.content_.size()*8)), content_(Blob(p.content_.begin(), p.content_.begin()+size_/8)) { + auto rem = size_ % 8; - if ( not flags_.empty() ) { + if ( not p.flags_.empty() ) { flags_ = Blob(p.flags_.begin(), p.flags_.begin()+size_/8); if (rem) flags_.push_back(p.flags_[size_/8] & (0xFF << (8 - rem))); @@ -46,25 +47,40 @@ struct Prefix { content_.push_back(p.content_[size_/8] & (0xFF << (8 - rem))); } + /** + * Get a sub prefix of the Prefix + * + * @param len lenght of the prefix to get, could be negative + * if len is negativ then you will get the prefix + * of size of the previous prefix minus len + * + * @return Sub-prefix of size len or if len is negative sub-prefix of size + * of prefix minus len + * + * @throw out_of_range if len is larger than size of the content + */ Prefix getPrefix(ssize_t len) const { if ((size_t)std::abs(len) >= content_.size() * 8) throw std::out_of_range("len larger than prefix size."); if (len < 0) len += size_; + return Prefix(*this, len); } /** - * Method for getting the state of the bit at the position pos. - * @param pos : Pos of the needed bit - * @return : true if the bit is at 1 - * false otherwise - * @throw out_of_range Throw out of range if the bit at 'pos' does not exist + * Flags are considered as active if flag is empty or if the flag + * at pos 'pos' is active +ee * + * @see isActiveBit in private function */ bool isFlagActive(size_t pos) const { return flags_.empty() or isActiveBit(flags_, pos); } + /** + * @see isActiveBit in private function + */ bool isContentBitActive(size_t pos) const { return isActiveBit(content_, pos); } @@ -77,9 +93,11 @@ struct Prefix { * @return The prefix of this sibling. */ Prefix getSibling() const { - if ( not size_ ) - return Prefix(*this); - return swapBit(size_); + Prefix copy = *this; + if ( size_ ) + copy.swapContentBit(size_ - 1); + + return copy; } InfoHash hash() const { @@ -88,20 +106,6 @@ struct Prefix { return InfoHash::get(copy); } - std::string toString() const { - std::stringstream ss; - - ss << "Prefix : " << std::endl << "\tContent_ : "; - ss << blobToString(content_); - ss << std::endl; - - ss << "\tFlags_ : "; - ss << blobToString(flags_); - ss << std::endl; - - return ss.str(); - } - /** * This method count total of bit in common between 2 prefix * @@ -138,64 +142,63 @@ struct Prefix { } /** - * This method swap the bit a the position 'bit' - * - * @param bit Position of the bit to swap - * @return The prefix with the bit at position 'bit' swapped - * @throw out_of_range Throw out of range if bit does not exist + * @see doc of swap private function */ - Prefix swapBit(size_t bit) const { - if ( bit >= content_.size() * 8 ) - throw std::out_of_range("bit larger than prefix size."); + void swapContentBit(size_t bit) { + swapBit(content_, bit); } + /** + * @see doc of swap private function + */ void swapFlagBit(size_t bit) { swapBit(flags_, bit); } + /** + * @see doc of addPadding private function + */ void addPaddingContent(size_t size) { content_ = addPadding(content_, size); } void updateFlags() { - // Fill first known bit + /* Fill first known bit */ auto csize = size_ - flags_.size() * 8; while(csize >= 8) { flags_.push_back(0xFF); csize -= 8; } - // if needed fill remaining bit + /* if needed fill remaining bit */ if ( csize ) flags_.push_back(0xFF << (8 - csize)); - // Complet vector space missing + /* Complet vector space missing */ for ( auto i = flags_.size(); i < content_.size(); i++ ) flags_.push_back(0xFF); } + std::string toString() const; + size_t size_ {0}; + /* Will contain flags according to content_. + If flags_[i] == 0, then content_[i] is unknown + else if flags_[i] == 1, then content_[i] is known */ Blob flags_ {}; Blob content_ {}; private: - std::string blobToString(const Blob &bl) const { - std::stringstream ss; - - auto bn = size_ % 8; - auto n = size_ / 8; - - for (size_t i = 0; i < bl.size(); i++) - ss << std::bitset<8>(bl[i]) << " "; - if (bn) - for (unsigned b=0; b < bn; b++) - ss << (char)((bl[n] & (1 << (7 - b))) ? '1':'0'); - - return ss.str(); - } - + /** + * Add a padding to the input blob + * + * @param toP : Prefix where to add a padding + * @param size : Final size of the prefix with padding + * + * @return Copy of the input Blob but with a padding + */ Blob addPadding(Blob toP, size_t size) { Blob copy = toP; for ( auto i = copy.size(); i < size; i++ ) @@ -205,13 +208,33 @@ private: return copy; } + /** + * Check if the bit a pos 'pos' is active, i.e. equal to 1 + * + * @param b : Blob to check + * @param pos : Position to check + * + * @return true if the bit is equal to 1, false otherwise + * + * @throw out_of_range if bit is superior to blob size * 8 + */ bool isActiveBit(const Blob &b, size_t pos) const { - if ( pos >= size_ ) + if ( pos >= content_.size() * 8 ) throw std::out_of_range("Can't detect active bit at pos, pos larger than prefix size or empty prefix"); return ((b[pos / 8] >> (7 - (pos % 8)) ) & 1) == 1; } + /** + * Swap bit at position bit [from 0 to 1 and vice-versa] + * + * @param b : Blob to swap + * @param bit : Bit to swap on b + * + * @return the input prefix with the bit at pos 'bit' swapped + * + * @throw out_of_range if bit is superior to blob size * 8 + */ void swapBit(Blob &b, size_t bit) { if ( bit >= b.size() * 8 ) throw std::out_of_range("bit larger than prefix size."); @@ -257,6 +280,7 @@ public: /* A key for a an index entry */ using Key = std::map<std::string, Blob>; + /* Specifications of the keys. It defines the number, the length and the * serialization order of fields. */ using KeySpec = std::map<std::string, size_t>; @@ -395,6 +419,13 @@ private: std::shared_ptr<unsigned> max_common_prefix_len, int start = -1, bool all_values = false); + /** + * Apply the zcurve algorithm on the list of input prefix + * + * @param all_prefix : Vector of prefix to interleave + * + * @return The output prefix where all flags and content are interleaves + */ Prefix zcurve(const std::vector<Prefix>& all_prefix) const; /** diff --git a/src/indexation/pht.cpp b/src/indexation/pht.cpp index 0fa380a8147f884accf08f6dec79ddff2f5fe70f..f3ef596acbe1793575ded663daffd3a522304b98 100644 --- a/src/indexation/pht.cpp +++ b/src/indexation/pht.cpp @@ -4,6 +4,41 @@ namespace dht { namespace indexation { +/** + * Output the blob into string and readable way + * + * @param bl : Blob to print + * + * @return string that represent the blob into a readable way + */ +static std::string blobToString(const Blob &bl) { + std::stringstream ss; + auto bn = bl.size() % 8; + auto n = bl.size() / 8; + + for (size_t i = 0; i < bl.size(); i++) + ss << std::bitset<8>(bl[i]) << " "; + if (bn) + for (unsigned b=0; b < bn; b++) + ss << (char)((bl[n] & (1 << (7 - b))) ? '1':'0'); + + return ss.str(); +} + +std::string Prefix::toString() const { + std::stringstream ss; + + ss << "Prefix : " << std::endl << "\tContent_ : "; + ss << blobToString(content_); + ss << std::endl; + + ss << "\tFlags_ : "; + ss << blobToString(flags_); + ss << std::endl; + + return ss.str(); +} + void Pht::Cache::insert(const Prefix& p) { size_t i = 0; auto now = clock::now(); @@ -106,6 +141,7 @@ void Pht::lookupStep(Prefix p, std::shared_ptr<int> lo, std::shared_ptr<int> hi, /* start could be under 0 but after the compare it to 0 it always will be unsigned, so we can cast it*/ auto mid = (start >= 0) ? (unsigned) start : (*lo + *hi)/2; + auto first_res = std::make_shared<node_lookup_result>(); auto second_res = std::make_shared<node_lookup_result>(); @@ -117,6 +153,8 @@ void Pht::lookupStep(Prefix p, std::shared_ptr<int> lo, std::shared_ptr<int> hi, } else if (is_leaf or *lo > *hi) { // leaf node + Prefix to_insert = p.getPrefix(mid); + cache_.insert(to_insert); if (cb) { if (vals->size() == 0 and max_common_prefix_len and mid > 0) { @@ -126,7 +164,7 @@ void Pht::lookupStep(Prefix p, std::shared_ptr<int> lo, std::shared_ptr<int> hi, lookupStep(p_, lo, hi, vals, cb, done_cb, max_common_prefix_len, -1, all_values); } - cb(*vals, p.getPrefix(mid)); + cb(*vals, to_insert); } if (done_cb) @@ -291,10 +329,12 @@ void Pht::insert(Prefix kp, IndexEntry entry, std::shared_ptr<int> lo, std::shar if ( not check_split or final_prefix->size_ == kp.size_ ) { real_insert(final_prefix, std::move(entry)); } else { - if ( vals->size() < MAX_NODE_ENTRY_COUNT ) + if ( vals->size() < MAX_NODE_ENTRY_COUNT ) { getRealPrefix(final_prefix, std::move(entry), real_insert); - else + } + else { split(*final_prefix, vals, entry, real_insert); + } } } }, nullptr, cache_.lookup(kp), true @@ -307,27 +347,39 @@ Prefix Pht::zcurve(const std::vector<Prefix>& all_prefix) const { if ( all_prefix.size() == 1 ) return all_prefix[0]; - for ( size_t j = 0, bit = 0; j < all_prefix[0].content_.size(); j++) { + /* All prefix got the same size (thanks to padding) */ + size_t prefix_size = all_prefix[0].content_.size(); + + /* Loop on all uint8_t of the input prefix */ + for ( size_t j = 0, bit = 0; j < prefix_size; j++) { uint8_t mask = 0x80; + /* For each of the 8 bits of the input uint8_t */ for ( int i = 0; i < 8; ) { uint8_t flags = 0; uint8_t content = 0; - for ( int k = 0 ; k < 8; k++, bit++ ) { + /* For each bit of the output uint8_t */ + for ( int k = 0 ; k < 8; k++ ) { auto diff = k - i; - auto x = all_prefix[bit].content_[j] & mask; - auto y = all_prefix[bit].flags_[j] & mask; + /*get the content 'c', and the flag 'f' of the input prefix */ + auto c = all_prefix[bit].content_[j] & mask; + auto f = all_prefix[bit].flags_[j] & mask; - content |= ( diff >= 0 ) ? x >> diff : x << std::abs(diff); - flags |= ( diff >= 0 ) ? y >> diff : y << std::abs(diff); + /* Move this bit at the right position according to the diff + and merge it into content and flags in the same way */ + content |= ( diff >= 0 ) ? c >> diff : c << std::abs(diff); + flags |= ( diff >= 0 ) ? f >> diff : f << std::abs(diff); - if ( bit == all_prefix.size() - 1 ) { bit = -1; ++i; mask >>= 1; } + /* If we are on the last prefix of the vector get back to the first and + ,move the mask in order to get the n + 1nth bit */ + if ( ++bit == all_prefix.size() ) { bit = 0; ++i; mask >>= 1; } } + /* Add the next flags + content to the output prefix */ p.content_.push_back(content); p.flags_.push_back(flags); p.size_ += 8; @@ -343,6 +395,7 @@ Prefix Pht::linearize(Key k) const { std::vector<Prefix> all_prefix; all_prefix.reserve(k.size()); + /* Get the max size of the keyspec and take it for size limit (for padding) */ auto max = std::max_element(keySpec_.begin(), keySpec_.end(), [](const std::pair<std::string, size_t>& a, const std::pair<std::string, size_t>& b) { return a.second < b.second; @@ -444,10 +497,8 @@ void Pht::split(Prefix insert, std::shared_ptr<std::vector<std::shared_ptr<Index auto loc = foundSplitLocation(full, vals); auto prefix_to_insert = std::make_shared<Prefix>(full.getPrefix(loc)); - std::cerr << " Split loc " << loc << " full" << full.toString() << " size " << full.size_ << std::endl; for(;loc != insert.size_ - 1; loc--) { - std::cerr << "loc " << full.getPrefix(loc).toString() << std::endl; updateCanary(full.getPrefix(loc)); } diff --git a/tools/dhtnode.cpp b/tools/dhtnode.cpp index e65247cd85efb6df5bd16ad5469907d6c27ee203..76cd1c4e061c0b7316adf928554f3f44c35d6a20 100644 --- a/tools/dhtnode.cpp +++ b/tools/dhtnode.cpp @@ -284,7 +284,7 @@ void cmd_loop(std::shared_ptr<DhtRunner>& dht, std::map<std::string, indexation: if (vals.empty()) return; std::cout << "Pht::lookup: found entries!" << std::endl - << " prefix: \"" << p.toString() << "\"" << std::endl + << p.toString() << std::endl << " hash: " << p.hash() << std::endl; std::cout << " entries:" << std::endl; for (auto v : vals)