Go to the documentation of this file.00001
00002
00003
00004 #ifndef IBIS_KEYWORDS_H
00005 #define IBIS_KEYWORDS_H
00006
00007
00008
00009
00010
00011 #include "index.h"
00012 #include "category.h"
00013
00078 class ibis::keywords : public ibis::index {
00079 public:
00080 virtual ~keywords() {clear();}
00081 explicit keywords(const ibis::column* c, const char* f=0);
00082 keywords(const ibis::column* c, ibis::text::tokenizer& tkn,
00083 const char* f=0);
00084 keywords(const ibis::column* c, ibis::fileManager::storage* st);
00085
00086 virtual INDEX_TYPE type() const {return KEYWORDS;}
00087 virtual const char* name() const {return "keywords";}
00088 virtual void binBoundaries(std::vector<double>& b) const {b.clear();}
00089 virtual void binWeights(std::vector<uint32_t>& b) const;
00090 virtual double getMin() const {return DBL_MAX;}
00091 virtual double getMax() const {return -DBL_MAX;}
00092 virtual double getSum() const {return -DBL_MAX;}
00094 long search(const char* kw, ibis::bitvector& hits) const;
00096 long search(const char* kw) const;
00097
00098 virtual void print(std::ostream& out) const;
00099 virtual int write(const char* dt) const;
00100 virtual int read(const char* idxfile);
00101 virtual int read(ibis::fileManager::storage* st);
00102 virtual long append(const char* dt, const char* df, uint32_t nnew);
00103
00104 using ibis::index::evaluate;
00105 using ibis::index::estimate;
00106 using ibis::index::undecidable;
00107 virtual long evaluate(const ibis::qContinuousRange& expr,
00108 ibis::bitvector& hits) const;
00109 virtual void estimate(const ibis::qContinuousRange& expr,
00110 ibis::bitvector& lower,
00111 ibis::bitvector& upper) const;
00112 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00115 virtual float undecidable(const ibis::qContinuousRange&,
00116 ibis::bitvector& iffy) const {
00117 iffy.clear();
00118 return 0.0;
00119 }
00120 virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00121 virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00122
00123 class tokenizer;
00124
00125 protected:
00126 virtual size_t getSerialSize() const throw();
00127 int readTermDocFile(const ibis::column* idcol, const char* f);
00128 inline char readTerm(const char*& buf, std::string &key) const;
00129 inline uint32_t readUInt(const char*& buf) const;
00130 int readTDLine(std::istream& in, std::string& key,
00131 std::vector<uint32_t>& idlist,
00132 char* buf, uint32_t nbuf) const;
00133 void setBits(std::vector<uint32_t>& pos, ibis::bitvector& bvec) const;
00134 int parseTextFile(ibis::text::tokenizer &tkn, const char *f);
00135
00137 void clear();
00138
00139 private:
00140 ibis::dictionary terms;
00141 };
00142
00148 inline char ibis::keywords::readTerm(const char*& buf,
00149 std::string &keyword) const {
00150 while (isspace(*buf))
00151 ++ buf;
00152 while (isprint(*buf)) {
00153 if (*buf == ':') {
00154 return *buf;
00155 }
00156 else if (isspace(*buf)) {
00157 for (++ buf; isspace(*buf); ++ buf);
00158 if (*buf == ':') {
00159 return *buf;
00160 }
00161 else {
00162 keyword += ' ';
00163 keyword += *buf;
00164 ++ buf;
00165 }
00166 }
00167 else {
00168 keyword += *buf;
00169 ++ buf;
00170 }
00171 }
00172 return *buf;
00173 }
00174
00176 inline uint32_t ibis::keywords::readUInt(const char*& buf) const {
00177 uint32_t res = 0;
00178 while (*buf && ! isdigit(*buf))
00179 ++ buf;
00180
00181 while (isdigit(*buf)) {
00182 res = res * 10 + (*buf - '0');
00183 ++ buf;
00184 }
00185 return res;
00186 }
00187
00189 class ibis::keywords::tokenizer : public ibis::text::tokenizer {
00190 public:
00196 tokenizer(const char *d=ibis::util::delimiters) : delim_(d) {}
00198 virtual ~tokenizer() {}
00199
00200 virtual int operator()(std::vector<const char*>& tkns, char *buf);
00201
00202 private:
00203 std::string delim_;
00204 };
00205 #endif