12 #ifndef __RD_SPARSE_INT_VECT_20070921__
13 #define __RD_SPARSE_INT_VECT_20070921__
27 template <
typename IndexType>
39 d_length = other.d_length;
40 d_data.insert(other.d_data.begin(), other.d_data.end());
45 initFromText(pkl.c_str(), pkl.size());
49 initFromText(pkl, len);
56 d_length = other.d_length;
57 d_data.insert(other.d_data.begin(), other.d_data.end());
65 #pragma clang diagnostic push
66 #pragma clang diagnostic ignored "-Wtautological-compare"
67 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
68 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 1))
69 #if (__GNUC__ > 4 || __GNUC_MINOR__ > 5)
70 #pragma GCC diagnostic push
72 #pragma GCC diagnostic ignored "-Wtype-limits"
76 if (idx < 0 || idx >= d_length) {
80 typename StorageType::const_iterator iter = d_data.find(idx);
81 if (iter != d_data.end()) {
88 void setVal(IndexType idx,
int val) {
89 if (idx < 0 || idx >= d_length) {
99 #pragma clang diagnostic pop
100 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
101 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5))
102 #pragma GCC diagnostic pop
114 typename StorageType::const_iterator iter;
115 for (iter = d_data.begin(); iter != d_data.end(); ++iter) {
119 res += abs(iter->second);
133 if (other.d_length != d_length) {
137 typename StorageType::iterator iter = d_data.begin();
138 typename StorageType::const_iterator oIter = other.d_data.begin();
139 while (iter != d_data.end()) {
141 while (oIter != other.d_data.end() && oIter->first < iter->first) {
144 if (oIter != other.d_data.end() && oIter->first == iter->first) {
146 if (oIter->second < iter->second) {
147 iter->second = oIter->second;
154 typename StorageType::iterator tmpIter = iter;
172 if (other.d_length != d_length) {
176 typename StorageType::iterator iter = d_data.begin();
177 typename StorageType::const_iterator oIter = other.d_data.begin();
178 while (iter != d_data.end()) {
180 while (oIter != other.d_data.end() && oIter->first < iter->first) {
181 d_data[oIter->first] = oIter->second;
184 if (oIter != other.d_data.end() && oIter->first == iter->first) {
186 if (oIter->second > iter->second) {
187 iter->second = oIter->second;
194 while (oIter != other.d_data.end()) {
195 d_data[oIter->first] = oIter->second;
207 if (other.d_length != d_length) {
210 typename StorageType::iterator iter = d_data.begin();
211 typename StorageType::const_iterator oIter = other.d_data.begin();
212 while (oIter != other.d_data.end()) {
213 while (iter != d_data.end() && iter->first < oIter->first) {
216 if (iter != d_data.end() && oIter->first == iter->first) {
218 iter->second += oIter->second;
220 typename StorageType::iterator tIter = iter;
228 d_data[oIter->first] = oIter->second;
241 if (other.d_length != d_length) {
244 typename StorageType::iterator iter = d_data.begin();
245 typename StorageType::const_iterator oIter = other.d_data.begin();
246 while (oIter != other.d_data.end()) {
247 while (iter != d_data.end() && iter->first < oIter->first) {
250 if (iter != d_data.end() && oIter->first == iter->first) {
252 iter->second -= oIter->second;
254 typename StorageType::iterator tIter = iter;
262 d_data[oIter->first] = -oIter->second;
274 typename StorageType::iterator iter = d_data.begin();
275 while (iter != d_data.end()) {
286 typename StorageType::iterator iter = d_data.begin();
287 while (iter != d_data.end()) {
298 typename StorageType::iterator iter = d_data.begin();
299 while (iter != d_data.end()) {
310 typename StorageType::iterator iter = d_data.begin();
311 while (iter != d_data.end()) {
323 if (d_length != v2.d_length) {
326 return d_data == v2.d_data;
329 return !(*
this == v2);
334 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
339 tInt =
sizeof(IndexType);
342 IndexType nEntries = d_data.size();
345 typename StorageType::const_iterator iter = d_data.begin();
346 while (iter != d_data.end()) {
348 std::int32_t tInt = iter->second;
356 initFromText(txt.c_str(), txt.length());
363 void initFromText(
const char *pkl,
const unsigned int len) {
365 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
371 if (vers == 0x0001) {
374 if (tInt >
sizeof(IndexType)) {
376 "IndexType cannot accommodate index size in SparseIntVect pickle");
380 readVals<unsigned char>(ss);
382 case sizeof(std::int32_t):
383 readVals<std::uint32_t>(ss);
385 case sizeof(boost::int64_t):
386 readVals<boost::uint64_t>(ss);
395 template <
typename T>
396 void readVals(std::stringstream &ss) {
397 PRECONDITION(
sizeof(T) <=
sizeof(IndexType),
"invalid size");
403 for (T i = 0; i < nEntries; ++i) {
412 template <
typename IndexType,
typename SequenceType>
414 const SequenceType &seq) {
415 typename SequenceType::const_iterator seqIt;
416 for (seqIt = seq.begin(); seqIt != seq.end(); ++seqIt) {
418 IndexType idx = *seqIt;
424 template <
typename IndexType>
425 void calcVectParams(
const SparseIntVect<IndexType> &v1,
426 const SparseIntVect<IndexType> &v2,
double &v1Sum,
427 double &v2Sum,
double &andSum) {
428 if (v1.getLength() != v2.getLength()) {
431 v1Sum = v2Sum = andSum = 0.0;
434 typename SparseIntVect<IndexType>::StorageType::const_iterator iter1, iter2;
435 iter1 = v1.getNonzeroElements().begin();
436 if (iter1 != v1.getNonzeroElements().end()) v1Sum += abs(iter1->second);
437 iter2 = v2.getNonzeroElements().begin();
438 if (iter2 != v2.getNonzeroElements().end()) v2Sum += abs(iter2->second);
439 while (iter1 != v1.getNonzeroElements().end()) {
440 while (iter2 != v2.getNonzeroElements().end() &&
441 iter2->first < iter1->first) {
443 if (iter2 != v2.getNonzeroElements().end()) v2Sum += abs(iter2->second);
445 if (iter2 != v2.getNonzeroElements().end()) {
446 if (iter2->first == iter1->first) {
447 if (abs(iter2->second) < abs(iter1->second)) {
448 andSum += abs(iter2->second);
450 andSum += abs(iter1->second);
453 if (iter2 != v2.getNonzeroElements().end()) v2Sum += abs(iter2->second);
456 if (iter1 != v1.getNonzeroElements().end()) v1Sum += abs(iter1->second);
461 if (iter1 != v1.getNonzeroElements().end()) {
463 while (iter1 != v1.getNonzeroElements().end()) {
464 v1Sum += abs(iter1->second);
468 if (iter2 != v2.getNonzeroElements().end()) {
470 while (iter2 != v2.getNonzeroElements().end()) {
471 v2Sum += abs(iter2->second);
478 template <
typename IndexType>
481 bool returnDistance =
false,
double bounds = 0.0) {
487 if (!returnDistance && bounds > 0.0) {
490 double denom = v1Sum + v2Sum;
491 if (fabs(denom) < 1e-6) {
495 double minV = v1Sum < v2Sum ? v1Sum : v2Sum;
496 if (2. * minV / denom < bounds) {
505 calcVectParams(v1, v2, v1Sum, v2Sum, numer);
507 double denom = v1Sum + v2Sum;
509 if (fabs(denom) < 1e-6) {
512 sim = 2. * numer / denom;
514 if (returnDistance) sim = 1. - sim;
519 template <
typename IndexType>
522 bool returnDistance =
false,
double bounds = 0.0) {
531 calcVectParams(v1, v2, v1Sum, v2Sum, andSum);
533 double denom = a * v1Sum + b * v2Sum + (1 - a - b) * andSum;
536 if (fabs(denom) < 1e-6) {
539 sim = andSum / denom;
541 if (returnDistance) sim = 1. - sim;
546 template <
typename IndexType>
549 bool returnDistance =
false,
double bounds = 0.0) {
#define RDUNUSED_PARAM(x)
#define PRECONDITION(expr, mess)
const int ci_SPARSEINTVECT_VERSION
version number to use in pickles
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
a class for efficiently storing sparse vectors of ints
SparseIntVect< IndexType > & operator+=(int v)
SparseIntVect< IndexType > & operator/(int v)
SparseIntVect(IndexType length)
initialize with a particular length
unsigned int size() const
returns the length
const SparseIntVect< IndexType > operator+(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator*(int v)
SparseIntVect< IndexType > & operator+=(const SparseIntVect< IndexType > &other)
bool operator==(const SparseIntVect< IndexType > &v2) const
SparseIntVect(const SparseIntVect< IndexType > &other)
Copy constructor.
SparseIntVect< IndexType > & operator|=(const SparseIntVect< IndexType > &other)
const SparseIntVect< IndexType > operator-(const SparseIntVect< IndexType > &other) const
const SparseIntVect< IndexType > operator|(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator/=(int v)
const SparseIntVect< IndexType > operator&(const SparseIntVect< IndexType > &other) const
SparseIntVect(const char *pkl, const unsigned int len)
constructor from a pickle
int operator[](IndexType idx) const
support indexing using []
void fromString(const std::string &txt)
SparseIntVect< IndexType > & operator*=(int v)
void setVal(IndexType idx, int val)
set the value at an index
SparseIntVect< IndexType > & operator&=(const SparseIntVect< IndexType > &other)
SparseIntVect & operator=(const SparseIntVect< IndexType > &other)
std::string toString() const
returns a binary string representation (pickle)
int getTotalVal(bool doAbs=false) const
SparseIntVect< IndexType > & operator-(int v)
std::map< IndexType, int > StorageType
SparseIntVect< IndexType > & operator-=(const SparseIntVect< IndexType > &other)
bool operator!=(const SparseIntVect< IndexType > &v2) const
SparseIntVect< IndexType > & operator-=(int v)
SparseIntVect(const std::string &pkl)
constructor from a pickle
~SparseIntVect()
destructor (doesn't need to do anything)
SparseIntVect< IndexType > & operator+(int v)
IndexType getLength() const
returns the length
int getVal(IndexType idx) const
return the value at an index
const StorageType & getNonzeroElements() const
returns our nonzero elements as a map(IndexType->int)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
double TverskySimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, double a, double b, bool returnDistance=false, double bounds=0.0)
void updateFromSequence(SparseIntVect< IndexType > &vect, const SequenceType &seq)
double TanimotoSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
double DiceSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream