28 || this->options.min_octave > this->options.max_octave)
29 throw std::invalid_argument(
"Invalid octave range");
53 std::cout <<
"SIFT: Creating "
55 <<
" octaves (" << this->options.
min_octave <<
" to "
56 << this->options.max_octave <<
")..." << std::endl;
62 std::cout <<
"SIFT: Creating octaves took "
71 std::cout <<
"SIFT: Detecting local extrema..." << std::endl;
77 std::cout <<
"SIFT: Detected " << this->keypoints.size()
78 <<
" keypoints, took " << timer.
get_elapsed() <<
"ms." << std::endl;
87 std::cout <<
"SIFT: Localizing and filtering keypoints..." << std::endl;
93 std::cout <<
"SIFT: Retained " << this->keypoints.size() <<
" stable "
94 <<
"keypoints, took " << timer.
get_elapsed() <<
"ms." << std::endl;
100 for (std::size_t i = 0; i < this->octaves.size(); ++i)
101 this->octaves[i].dog.clear();
111 std::cout <<
"SIFT: Generating keypoint descriptors..." << std::endl;
117 std::cout <<
"SIFT: Generated " << this->descriptors.size()
118 <<
" descriptors, took " << timer.
get_elapsed() <<
"ms."
123 std::cout <<
"SIFT: Generated " << this->descriptors.size()
124 <<
" descriptors from " << this->keypoints.size() <<
" keypoints,"
125 <<
" took " << total_timer.
get_elapsed() <<
"ms." << std::endl;
129 this->octaves.clear();
137 if (img->channels() != 1 && img->channels() != 3)
138 throw std::invalid_argument(
"Gray or color image expected");
141 if (img->channels() == 3)
143 this->orig = mve::image::desaturate<float>
153 if (img->channels() != 1 && img->channels() != 3)
154 throw std::invalid_argument(
"Gray or color image expected");
156 if (img->channels() == 3)
158 this->orig = mve::image::desaturate<float>
163 this->orig = img->duplicate();
172 this->octaves.clear();
182 = mve::image::rescale_double_size_supersample<float>(this->orig);
184 this->options.base_blur_sigma);
192 for (
int i = 0; i < this->options.
min_octave; ++i)
193 img = mve::image::rescale_half_size_gaussian<float>(img);
200 for (
int i = std::max(0, this->options.
min_octave);
205 img = mve::image::rescale_half_size_gaussian<float>(img);
214 float has_sigma,
float target_sigma)
225 ? mve::image::blur_gaussian<float>(image, sigma)
226 : image->duplicate());
229 this->octaves.push_back(
Octave());
230 Octave& oct = this->octaves.back();
231 oct.
img.push_back(base);
235 sigma = target_sigma;
241 float sigmak = sigma * k;
249 oct.
img.push_back(img);
253 oct.
dog.push_back(dog);
267 this->keypoints.clear();
270 for (std::size_t i = 0; i < this->octaves.size(); ++i)
272 Octave const& oct(this->octaves[i]);
274 for (
int s = 0; s < (int)oct.
dog.size() - 2; ++s)
277 { oct.
dog[s + 0], oct.
dog[s + 1], oct.
dog[s + 2] };
289 int const w = s[1]->width();
290 int const h = s[1]->height();
293 int noff[9] = { -1 - w, 0 - w, 1 - w, -1, 0, 1, -1 + w, 0 + w, 1 + w };
301 for (
int y = 1; y < h - 1; ++y, off += w)
302 for (
int x = 1; x < w - 1; ++x)
307 bool smallest =
true;
308 float center_value = s[1]->at(idx);
309 for (
int l = 0; (largest || smallest) && l < 3; ++l)
310 for (
int i = 0; (largest || smallest) && i < 9; ++i)
312 if (l == 1 && i == 4)
314 if (s[l]->at(idx + noff[i]) >= center_value)
316 if (s[l]->at(idx + noff[i]) <= center_value)
321 if (!smallest && !largest)
327 kp.
x =
static_cast<float>(x);
328 kp.
y =
static_cast<float>(y);
329 kp.
sample =
static_cast<float>(si);
330 this->keypoints.push_back(kp);
348 int num_singular = 0;
349 int num_keypoints = 0;
350 for (std::size_t i = 0; i < this->keypoints.size(); ++i)
356 Octave const& oct(this->octaves[kp.
octave - this->options.min_octave]);
357 int sample =
static_cast<int>(kp.
sample);
359 { oct.
dog[sample + 0], oct.
dog[sample + 1], oct.
dog[sample + 2] };
362 int const w = dogs[0]->width();
363 int const h = dogs[0]->height();
365 int ix =
static_cast<int>(kp.
x);
366 int iy =
static_cast<int>(kp.
y);
367 int is =
static_cast<int>(kp.
sample);
379# define AT(S,OFF) (dogs[S]->at(px + OFF))
380 for (
int j = 0; j < 5; ++j)
382 std::size_t px = iy * w + ix;
385 Dx = (
AT(1,1) -
AT(1,-1)) * 0.5f;
386 Dy = (
AT(1,w) -
AT(1,-w)) * 0.5f;
387 Ds = (
AT(2,0) -
AT(0,0)) * 0.5f;
389 Dxx =
AT(1,1) +
AT(1,-1) - 2.0f *
AT(1,0);
390 Dyy =
AT(1,w) +
AT(1,-w) - 2.0f *
AT(1,0);
391 Dss =
AT(2,0) +
AT(0,0) - 2.0f *
AT(1,0);
393 Dxy = (
AT(1,1+w) +
AT(1,-1-w) -
AT(1,-1+w) -
AT(1,1-w)) * 0.25f;
394 Dxs = (
AT(2,1) +
AT(0,-1) -
AT(2,-1) -
AT(0,1)) * 0.25f;
395 Dys = (
AT(2,w) +
AT(0,-w) -
AT(2,-w) -
AT(0,w)) * 0.25f;
399 A[0] = Dxx; A[1] = Dxy; A[2] = Dxs;
400 A[3] = Dxy; A[4] = Dyy; A[5] = Dys;
401 A[6] = Dxs; A[7] = Dys; A[8] = Dss;
416 fx = b[0]; fy = b[1]; fs = b[2];
419 int dx = (fx > 0.6f && ix < w-2) * 1 + (fx < -0.6f && ix > 1) * -1;
420 int dy = (fy > 0.6f && iy < h-2) * 1 + (fy < -0.6f && iy > 1) * -1;
424 if (dx != 0 || dy != 0)
436 float val = dogs[1]->at(ix, iy, 0) + 0.5f * (Dx * fx + Dy * fy + Ds * fs);
439 float hessian_trace = Dxx + Dyy;
440 float hessian_det = Dxx * Dyy -
MATH_POW2(Dxy);
441 float hessian_score =
MATH_POW2(hessian_trace) / hessian_det;
448 kp.
x = (float)ix + fx;
449 kp.
y = (float)iy + fy;
450 kp.
sample = (float)is + fs;
462 || hessian_score < 0.0f || hessian_score > score_thres
463 || std::abs(fx) > 1.5f || std::abs(fy) > 1.5f || std::abs(fs) > 1.0f
466 || kp.
x < 0.0f || kp.
x > (
float)(w - 1)
467 || kp.
y < 0.0f || kp.
y > (
float)(h - 1))
474 this->keypoints[num_keypoints] = kp;
479 this->keypoints.resize(num_keypoints);
483 std::cout <<
"SIFT: Warning: " << num_singular
484 <<
" singular matrices detected!" << std::endl;
493 if (this->octaves.empty())
494 throw std::runtime_error(
"Octaves not available!");
495 if (this->keypoints.empty())
498 this->descriptors.clear();
499 this->descriptors.reserve(this->keypoints.size() * 3 / 2);
507 int octave_index = this->keypoints[0].octave;
512 for (std::size_t i = 0; i < this->keypoints.size(); ++i)
514 Keypoint const& kp(this->keypoints[i]);
517 if (kp.
octave > octave_index)
522 octave->
grad.clear();
527 octave = &this->octaves[octave_index - this->options.
min_octave];
530 else if (kp.
octave < octave_index)
532 throw std::runtime_error(
"Decreasing octave index!");
536 std::vector<float> orientations;
537 orientations.reserve(8);
541 for (std::size_t j = 0; j < orientations.size(); ++j)
544 float const scale_factor = std::pow(2.0f, kp.
octave);
545 desc.
x = scale_factor * (kp.
x + 0.5f) - 0.5f;
546 desc.
y = scale_factor * (kp.
y + 0.5f) - 0.5f;
550 this->descriptors.push_back(desc);
560 octave->
grad.clear();
561 octave->
grad.reserve(octave->
img.size());
563 octave->
ori.reserve(octave->
img.size());
565 int const width = octave->
img[0]->width();
566 int const height = octave->
img[0]->height();
569 for (std::size_t i = 0; i < octave->
img.size(); ++i)
575 int image_iter = width + 1;
576 for (
int y = 1; y < height - 1; ++y, image_iter += 2)
577 for (
int x = 1; x < width - 1; ++x, ++image_iter)
579 float m1x = img->at(image_iter - 1);
580 float p1x = img->at(image_iter + 1);
581 float m1y = img->at(image_iter - width);
582 float p1y = img->at(image_iter + width);
583 float dx = 0.5f * (p1x - m1x);
584 float dy = 0.5f * (p1y - m1y);
586 float atan2f = std::atan2(dy, dx);
587 grad->at(image_iter) = std::sqrt(dx * dx + dy * dy);
588 ori->at(image_iter) = atan2f < 0.0f
589 ? atan2f +
MATH_PI * 2.0f : atan2f;
591 octave->
grad.push_back(grad);
592 octave->
ori.push_back(ori);
600 Octave const* octave, std::vector<float>& orientations)
602 int const nbins = 36;
603 float const nbinsf =
static_cast<float>(nbins);
607 std::fill(hist, hist + nbins, 0.0f);
610 int const ix =
static_cast<int>(kp.
x + 0.5f);
611 int const iy =
static_cast<int>(kp.
y + 0.5f);
618 int const width = grad->width();
619 int const height = grad->height();
628 float const sigma_factor = 1.5f;
629 int win =
static_cast<int>(sigma * sigma_factor * 3.0f);
630 if (ix < win || ix + win >= width || iy < win || iy + win >= height)
634 int center = iy * width + ix;
635 float const dxf = kp.
x -
static_cast<float>(ix);
636 float const dyf = kp.
y -
static_cast<float>(iy);
637 float const maxdist =
static_cast<float>(win*win) + 0.5f;
640 for (
int dy = -win; dy <= win; ++dy)
642 int const yoff = dy * width;
643 for (
int dx = -win; dx <= win; ++dx)
650 float gm = grad->at(center + yoff + dx);
651 float go = ori->at(center + yoff + dx);
653 int bin =
static_cast<int>(nbinsf * go / (2.0f *
MATH_PI));
655 hist[bin] += gm * weight;
660 for (
int i = 0; i < 6; ++i)
662 float first = hist[0];
663 float prev = hist[nbins - 1];
664 for (
int j = 0; j < nbins - 1; ++j)
666 float current = hist[j];
667 hist[j] = (prev + current + hist[j + 1]) / 3.0f;
670 hist[nbins - 1] = (prev + hist[nbins - 1] +
first) / 3.0f;
674 float maxh = *std::max_element(hist, hist + nbins);
677 for (
int i = 0; i < nbins; ++i)
679 float h0 = hist[(i + nbins - 1) % nbins];
681 float h2 = hist[(i + 1) % nbins];
684 if (h1 <= 0.8f * maxh || h1 <= h0 || h1 <= h2)
693 float x = -0.5f * (h2 - h0) / (h0 - 2.0f * h1 + h2);
694 float o = 2.0f *
MATH_PI * (x + (float)i + 0.5f) / nbinsf;
695 orientations.push_back(o);
715 int const ix =
static_cast<int>(kp.
x + 0.5f);
716 int const iy =
static_cast<int>(kp.
y + 0.5f);
718 float const dxf = kp.
x -
static_cast<float>(ix);
719 float const dyf = kp.
y -
static_cast<float>(iy);
725 int const width = grad->width();
726 int const height = grad->height();
743 float const binsize = 3.0f * sigma;
744 int win =
MATH_SQRT2 * binsize * (float)(PXB + 1) * 0.5f;
745 if (ix < win || ix + win >= width || iy < win || iy + win >= height)
754 int const center = iy * width + ix;
755 for (
int dy = -win; dy <= win; ++dy)
757 int const yoff = dy * width;
758 for (
int dx = -win; dx <= win; ++dx)
761 float const mod = grad->at(center + yoff + dx);
762 float const angle = ori->at(center + yoff + dx);
768 float const winx = (float)dx - dxf;
769 float const winy = (float)dy - dyf;
778 float binoff = (float)(PXB - 1) / 2.0f;
779 float binx = (coso * winx + sino * winy) / binsize + binoff;
780 float biny = (-sino * winx + coso * winy) / binsize + binoff;
781 float bint = theta * (float)OHB / (2.0f *
MATH_PI) - 0.5f;
784 float gaussian_sigma = 0.5f * (float)PXB;
790 float contrib = mod * gaussian_weight;
797 int bxi[2] = { (int)std::floor(binx), (int)std::floor(binx) + 1 };
798 int byi[2] = { (int)std::floor(biny), (int)std::floor(biny) + 1 };
799 int bti[2] = { (int)std::floor(bint), (int)std::floor(bint) + 1 };
801 float weights[3][2] = {
802 { (float)bxi[1] - binx, 1.0f - ((
float)bxi[1] - binx) },
803 { (float)byi[1] - biny, 1.0f - ((
float)byi[1] - biny) },
804 { (float)bti[1] - bint, 1.0f - ((
float)bti[1] - bint) }
814 int const xstride = OHB;
815 int const ystride = OHB * PXB;
816 for (
int y = 0; y < 2; ++y)
817 for (
int x = 0; x < 2; ++x)
818 for (
int t = 0; t < 2; ++t)
820 if (bxi[x] < 0 || bxi[x] >= PXB
821 || byi[y] < 0 || byi[y] >= PXB)
824 int idx = bti[t] + bxi[x] * xstride + byi[y] * ystride;
825 desc.
data[idx] += contrib * weights[0][x]
826 * weights[1][y] * weights[2][t];
835 for (
int i = 0; i < PXB * PXB * OHB; ++i)
836 desc.
data[i] = std::min(desc.
data[i], 0.2f);
858 (kp.
sample + 1.0f) / this->options.num_samples_per_octave);
865 kp.
octave + (kp.
sample + 1.0f) / this->options.num_samples_per_octave);
873 std::ifstream in(filename.c_str());
875 throw std::runtime_error(
"Cannot open descriptor file");
879 in >> num_descriptors >> num_dimensions;
880 if (num_descriptors > 100000 || num_dimensions != 128)
883 throw std::runtime_error(
"Invalid number of descriptors/dimensions");
886 result->reserve(num_descriptors);
887 for (
int i = 0; i < num_descriptors; ++i)
890 in >> descriptor.
y >> descriptor.
x
892 for (
int j = 0; j < 128; ++j)
893 in >> descriptor.
data[j];
895 result->push_back(descriptor);
902 throw std::runtime_error(
"Error while reading descriptors");
Matrix class for arbitrary dimensions and types.
Vector class for arbitrary dimensions and types.
Vector< T, N > & fill(T const &value)
Fills all vector elements with the given value.
Vector< T, N > & normalize(void)
Normalizes self and returns reference to self.
std::shared_ptr< Image< T > > Ptr
std::shared_ptr< Image< T > const > ConstPtr
static Ptr create(void)
Smart pointer image constructor.
void orientation_assignment(Keypoint const &kp, Octave const *octave, std::vector< float > &orientations)
void set_image(mve::ByteImage::ConstPtr img)
Sets the input image.
void keypoint_localization(void)
void descriptor_generation(void)
void process(void)
Starts the SIFT keypoint detection and descriptor extraction.
float keypoint_absolute_scale(Keypoint const &kp)
void extrema_detection(void)
void set_float_image(mve::FloatImage::ConstPtr img)
Sets the input image.
static void load_lowe_descriptors(std::string const &filename, Descriptors *result)
Helper function that creates SIFT descriptors from David Lowe's SIFT descriptor files.
void create_octaves(void)
std::vector< Descriptor > Descriptors
void generate_grad_ori_images(Octave *octave)
bool descriptor_assignment(Keypoint const &kp, Descriptor &desc, Octave const *octave)
void add_octave(mve::FloatImage::ConstPtr image, float has_sigma, float target_sigma)
float keypoint_relative_scale(Keypoint const &kp)
Simple timer class to take execution times.
std::size_t get_elapsed(void) const
#define MATH_EPSILON_EQ(x, v, eps)
T const & clamp(T const &v, T const &min=T(0), T const &max=T(1))
Returns value 'v' clamped to the interval specified by 'min' and 'max'.
T round(T const &x)
Removes the fractional part of the value to the closest integer.
Matrix< T, N, N > matrix_inverse(Matrix< T, N, N > const &mat)
Calculates the inverse of the given matrix.
T matrix_determinant(Matrix< T, N, N > const &mat)
Calculates the determinant of the given matrix.
T gaussian_xx(T const &xx, T const &sigma)
Gaussian function that expects x to be squared.
FloatImage::Ptr byte_to_float_image(ByteImage::ConstPtr image)
Converts a given byte image to a float image.
@ DESATURATE_AVERAGE
(R + G + B) * 1/3
#define SFM_NAMESPACE_END
#define SFM_NAMESPACE_BEGIN
Representation of the SIFT descriptor.
math::Vector< float, 128 > data
The descriptor data, elements are unsigned in [0.0, 1.0].
float scale
The scale (or sigma value) of the keypoint.
float orientation
The orientation of the image keypoint in [0, 2PI].
float x
The sub-pixel x-coordinate of the image keypoint.
float y
The sub-pixel y-coordinate of the image keypoint.
Representation of a SIFT keypoint.
float x
Keypoint x-coordinate.
float y
Keypoint y-coordinate.
int octave
Octave index of the keypoint.
float sample
Sample index.
Representation of a SIFT octave.
ImageVector grad
S+3 gradient images.
ImageVector dog
S+2 difference of gaussian images.
ImageVector ori
S+3 orientation images.
ImageVector img
S+3 images per octave.
float edge_ratio_threshold
Sets the edge threshold to eliminate edge responses.
bool debug_output
Produce even more messages on the console.
float base_blur_sigma
Sets the amount of desired base blur before constructing the octaves.
bool verbose_output
Produce status messages on the console.
float inherent_blur_sigma
Sets the inherent blur sigma in the input image.
float contrast_threshold
Sets contrast threshold, i.e.
int max_octave
Sets the maximum octave.
int num_samples_per_octave
Sets the amount of samples per octave.
int min_octave
Sets the minimum octave ID.