Tonypythony
/
audiowmark


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
							/*
 * Copyright (C) 2018-2020 Stefan Westerfeld
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <string>
#include <algorithm>

#include "wavdata.hh"
#include "wmcommon.hh"
#include "wmspeed.hh"
#include "convcode.hh"
#include "shortcode.hh"
#include "syncfinder.hh"
#include "resample.hh"
#include "fft.hh"

using std::string;
using std::vector;
using std::min;
using std::max;
using std::complex;

static vector<float>
normalize_soft_bits (const vector<float>& soft_bits)
{
  vector<float> norm_soft_bits;

  /* soft decoding produces better error correction than hard decoding */
  if (Params::hard)
    {
      for (auto value : soft_bits)
        norm_soft_bits.push_back (value > 0 ? 1.0 : 0.0);
    }
  else
    {
      /* figure out average level of each bit */
      double mean = 0;
      for (auto value : soft_bits)
        mean += fabs (value);
      mean /= soft_bits.size();

      /* rescale from [-mean,+mean] to [0.0,1.0] */
      for (auto value : soft_bits)
        norm_soft_bits.push_back (0.5 * (value / mean + 1));
    }

  return norm_soft_bits;
}

static vector<float>
mix_decode (vector<vector<complex<float>>>& fft_out, int n_channels)
{
  vector<float> raw_bit_vec;

  const int frame_count = mark_data_frame_count();

  vector<MixEntry> mix_entries = gen_mix_entries();

  double umag = 0, dmag = 0;
  for (int f = 0; f < frame_count; f++)
    {
      for (int ch = 0; ch < n_channels; ch++)
        {
          for (size_t frame_b = 0; frame_b < Params::bands_per_frame; frame_b++)
            {
              int b = f * Params::bands_per_frame + frame_b;
              const double min_db = -96;

              const size_t index = mix_entries[b].frame * n_channels + ch;
              const int u = mix_entries[b].up;
              const int d = mix_entries[b].down;

              umag += db_from_factor (abs (fft_out[index][u]), min_db);
              dmag += db_from_factor (abs (fft_out[index][d]), min_db);
            }
        }
      if ((f % Params::frames_per_bit) == (Params::frames_per_bit - 1))
        {
          raw_bit_vec.push_back (umag - dmag);
          umag = 0;
          dmag = 0;
        }
    }
  return raw_bit_vec;
}

static vector<float>
linear_decode (vector<vector<complex<float>>>& fft_out, int n_channels)
{
  UpDownGen     up_down_gen (Random::Stream::data_up_down);
  vector<float> raw_bit_vec;

  const int frame_count = mark_data_frame_count();

  double umag = 0, dmag = 0;
  for (int f = 0; f < frame_count; f++)
    {
      for (int ch = 0; ch < n_channels; ch++)
        {
          const size_t index = data_frame_pos (f) * n_channels + ch;
          UpDownArray up, down;
          up_down_gen.get (f, up, down);

          const double min_db = -96;
          for (auto u : up)
            umag += db_from_factor (abs (fft_out[index][u]), min_db);

          for (auto d : down)
            dmag += db_from_factor (abs (fft_out[index][d]), min_db);
        }
      if ((f % Params::frames_per_bit) == (Params::frames_per_bit - 1))
        {
          raw_bit_vec.push_back (umag - dmag);
          umag = 0;
          dmag = 0;
        }
    }
  return raw_bit_vec;
}

class ResultSet
{
public:
  enum class Type { BLOCK, CLIP, ALL };
  struct Pattern
  {
    vector<int>       bit_vec;
    float             decode_error = 0;
    SyncFinder::Score sync_score;
    Type              type;
    bool              speed_pattern;
  };
private:
  vector<Pattern> patterns;
  bool            speed_pattern = false;

public:
  void
  set_speed_pattern (bool sp)
  {
    speed_pattern = sp;
  }
  void
  add_pattern (SyncFinder::Score sync_score, const vector<int>& bit_vec, float decode_error, Type pattern_type)
  {
    Pattern p;
    p.sync_score = sync_score;
    p.bit_vec = bit_vec;
    p.decode_error = decode_error;
    p.type = pattern_type;
    p.speed_pattern = speed_pattern;

    patterns.push_back (p);
  }
  void
  print()
  {
    std::stable_sort (patterns.begin(), patterns.end(), [](const Pattern& p1, const Pattern& p2) {
      const int all1 = p1.type == Type::ALL;
      const int all2 = p2.type == Type::ALL;
      if (all1 != all2)
        return all1 < all2;
      else
        return p1.sync_score.index < p2.sync_score.index;
    });
    for (const auto& pattern : patterns)
      {
        if (pattern.type == Type::ALL) /* this is the combined pattern "all" */
          {
            const char *extra = "";
            if (pattern.speed_pattern)
              extra = " SPEED";

            printf ("pattern   all %s %.3f %.3f%s\n", bit_vec_to_str (pattern.bit_vec).c_str(),
                                                      pattern.sync_score.quality, pattern.decode_error,
                                                      extra);
          }
        else
          {
            string block_str;

            switch (pattern.sync_score.block_type)
              {
                case ConvBlockType::a:  block_str = "A";
                                        break;
                case ConvBlockType::b:  block_str = "B";
                                        break;
                case ConvBlockType::ab: block_str = "AB";
                                        break;
              }
            if (pattern.type == Type::CLIP)
              block_str = "CLIP-" + block_str;
            if (pattern.speed_pattern)
              block_str += "-SPEED";

            const int seconds = pattern.sync_score.index / Params::mark_sample_rate;
            printf ("pattern %2d:%02d %s %.3f %.3f %s\n", seconds / 60, seconds % 60, bit_vec_to_str (pattern.bit_vec).c_str(),
                      pattern.sync_score.quality, pattern.decode_error, block_str.c_str());
          }
      }
  }
  int
  print_match_count (const string& orig_pattern)
  {
    int match_count = 0;

    vector<int> orig_vec = bit_str_to_vec (orig_pattern);
    for (auto p : patterns)
      {
        bool        match = true;

        for (size_t i = 0; i < p.bit_vec.size(); i++)
          match = match && (p.bit_vec[i] == orig_vec[i % orig_vec.size()]);

        if (match)
          match_count++;
      }
    printf ("match_count %d %zd\n", match_count, patterns.size());
    return match_count;
  }
  double
  best_quality() const
  {
    double q = -1;
    for (const auto& pattern : patterns)
      if (pattern.sync_score.quality > q)
        q = pattern.sync_score.quality;
    return q;
  }
};

/*
 * The block decoder is responsible for finding whole data blocks inside the
 * input file and decoding them. This only works for files that are large
 * enough to contain one data block. Incomplete blocks are ignored.
 *
 * INPUT:   AA|BBBBB|AAAAA|BBB
 * MATCH       BBBBB
 * MATCH             AAAAA
 *
 * The basic algorithm is this:
 *
 *  - use sync finder to find start index for the blocks
 *  - decode the blocks
 *  - try to combine A + B blocks for better error correction (AB)
 *  - try to combine all available blocks for better error correction (all pattern)
 */
class BlockDecoder
{
  int debug_sync_frame_count = 0;
  vector<SyncFinder::Score> sync_scores; // stored here for sync debugging
public:
  void
  run (const WavData& wav_data, ResultSet& result_set)
  {
    int total_count = 0;

    SyncFinder sync_finder;
    sync_scores = sync_finder.search (wav_data, SyncFinder::Mode::BLOCK);

    vector<float> raw_bit_vec_all (code_size (ConvBlockType::ab, Params::payload_size));
    vector<int>   raw_bit_vec_norm (2);

    SyncFinder::Score score_all { 0, 0 };
    SyncFinder::Score score_ab  { 0, 0, ConvBlockType::ab };

    ConvBlockType last_block_type = ConvBlockType::b;
    vector<vector<float>> ab_raw_bit_vec (2);
    vector<float>         ab_quality (2);
    FFTAnalyzer           fft_analyzer (wav_data.n_channels());
    for (auto sync_score : sync_scores)
      {
        const size_t count = mark_sync_frame_count() + mark_data_frame_count();
        const size_t index = sync_score.index;
        const int    ab = (sync_score.block_type == ConvBlockType::b); /* A -> 0, B -> 1 */

        auto fft_range_out = fft_analyzer.fft_range (wav_data.samples(), index, count);
        if (fft_range_out.size())
          {
            /* ---- retrieve bits from watermark ---- */
            vector<float> raw_bit_vec;
            if (Params::mix)
              {
                raw_bit_vec = mix_decode (fft_range_out, wav_data.n_channels());
              }
            else
              {
                raw_bit_vec = linear_decode (fft_range_out, wav_data.n_channels());
              }
            assert (raw_bit_vec.size() == code_size (ConvBlockType::a, Params::payload_size));

            raw_bit_vec = randomize_bit_order (raw_bit_vec, /* encode */ false);

            /* ---- deal with this pattern ---- */
            float decode_error = 0;
            vector<int> bit_vec = code_decode_soft (sync_score.block_type, normalize_soft_bits (raw_bit_vec), &decode_error);

            if (!bit_vec.empty())
              result_set.add_pattern (sync_score, bit_vec, decode_error, ResultSet::Type::BLOCK);
            total_count += 1;

            /* ---- update "all" pattern ---- */
            score_all.quality += sync_score.quality;

            for (size_t i = 0; i < raw_bit_vec.size(); i++)
              {
                raw_bit_vec_all[i * 2 + ab] += raw_bit_vec[i];
              }
            raw_bit_vec_norm[ab]++;

            /* ---- if last block was A & this block is B => deal with combined AB block */
            ab_raw_bit_vec[ab] = raw_bit_vec;
            ab_quality[ab]     = sync_score.quality;
            if (last_block_type == ConvBlockType::a && sync_score.block_type == ConvBlockType::b)
              {
                /* join A and B block -> AB block */
                vector<float> ab_bits (raw_bit_vec.size() * 2);
                for (size_t i = 0; i <  raw_bit_vec.size(); i++)
                  {
                    ab_bits[i * 2] = ab_raw_bit_vec[0][i];
                    ab_bits[i * 2 + 1] = ab_raw_bit_vec[1][i];
                  }
                vector<int> bit_vec = code_decode_soft (ConvBlockType::ab, normalize_soft_bits (ab_bits), &decode_error);
                if (!bit_vec.empty())
                  {
                    score_ab.index = sync_score.index;
                    score_ab.quality = (ab_quality[0] + ab_quality[1]) / 2;
                    result_set.add_pattern (score_ab, bit_vec, decode_error, ResultSet::Type::BLOCK);
                  }
              }
            last_block_type = sync_score.block_type;
          }
      }
    if (total_count > 1) /* all pattern: average soft bits of all watermarks and decode */
      {
        for (size_t i = 0; i < raw_bit_vec_all.size(); i += 2)
          {
            raw_bit_vec_all[i]     /= max (raw_bit_vec_norm[0], 1); /* normalize A soft bits with number of A blocks */
            raw_bit_vec_all[i + 1] /= max (raw_bit_vec_norm[1], 1); /* normalize B soft bits with number of B blocks */
          }
        score_all.quality /= raw_bit_vec_norm[0] + raw_bit_vec_norm[1];

        vector<float> soft_bit_vec = normalize_soft_bits (raw_bit_vec_all);

        float decode_error = 0;
        vector<int> bit_vec = code_decode_soft (ConvBlockType::ab, soft_bit_vec, &decode_error);

        if (!bit_vec.empty())
          result_set.add_pattern (score_all, bit_vec, decode_error, ResultSet::Type::ALL);
      }

    debug_sync_frame_count = frame_count (wav_data);
  }
  void
  print_debug_sync()
  {
      /* search sync markers at typical positions */
      const int expect0 = Params::frames_pad_start * Params::frame_size;
      const int expect_step = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size;
      const int expect_end = debug_sync_frame_count * Params::frame_size;

      int sync_match = 0;
      for (int expect_index = expect0; expect_index + expect_step < expect_end; expect_index += expect_step)
        {
          for (auto sync_score : sync_scores)
            {
              if (abs (int (sync_score.index + Params::test_cut) - expect_index) < Params::frame_size / 2)
                {
                  sync_match++;
                  break;
                }
            }
        }
      printf ("sync_match %d %zd\n", sync_match, sync_scores.size());
  }
};

/*
 * The clip decoder is responsible for decoding short clips. It is designed to
 * handle input sizes that are smaller than one data block. One case is that
 * the clip contains a partial A block (so the data could start after the start
 * of the A block and end before the end of the A block).
 *
 * ORIG:   |AAAAA|BBBBB|AAAAA|BBBBB|
 * CLIP:    |AAA|
 *
 * A clip could also contain the end of one block and the start of the next block,
 * like this:
 *
 * ORIG:   |AAAAA|BBBBB|AAAAA|BBBBB|
 * CLIP:                   |A|BB|
 *
 * The basic algorithm is this:
 *
 *  - zeropad   |AAA|  => 00000|AAA|00000
 *  - use sync finder to find start index of one long block in the zeropadded data
 *  - decode the bits
 *
 * For files larger than one data block, we decode twice, at the beginning and end
 *
 * INPUT   AAA|BBBBB|A
 * CLIP #1 AAA|BB
 * CLIP #2      BBBB|A
 */
class ClipDecoder
{
  const int frames_per_block = 0;

  vector<float>
  mix_or_linear_decode (vector<vector<complex<float>>>& fft_out, int n_channels)
  {
    if (Params::mix)
      return mix_decode (fft_out, n_channels);
    else
      return linear_decode (fft_out, n_channels);
  }
  void
  run_padded (const WavData& wav_data, ResultSet& result_set, double time_offset_sec)
  {
    SyncFinder                sync_finder;
    vector<SyncFinder::Score> sync_scores = sync_finder.search (wav_data, SyncFinder::Mode::CLIP);
    FFTAnalyzer               fft_analyzer (wav_data.n_channels());

    for (auto sync_score : sync_scores)
      {
        const size_t count = mark_sync_frame_count() + mark_data_frame_count();
        const size_t index = sync_score.index;
        auto fft_range_out1 = fft_analyzer.fft_range (wav_data.samples(), index, count);
        auto fft_range_out2 = fft_analyzer.fft_range (wav_data.samples(), index + count * Params::frame_size, count);
        if (fft_range_out1.size() && fft_range_out2.size())
          {
            const auto raw_bit_vec1 = randomize_bit_order (mix_or_linear_decode (fft_range_out1, wav_data.n_channels()), /* encode */ false);
            const auto raw_bit_vec2 = randomize_bit_order (mix_or_linear_decode (fft_range_out2, wav_data.n_channels()), /* encode */ false);
            const size_t bits_per_block = raw_bit_vec1.size();
            vector<float> raw_bit_vec;
            for (size_t i = 0; i < bits_per_block; i++)
              {
                if (sync_score.block_type == ConvBlockType::a)
                  {
                    raw_bit_vec.push_back (raw_bit_vec1[i]);
                    raw_bit_vec.push_back (raw_bit_vec2[i]);
                  }
                else
                  {
                    raw_bit_vec.push_back (raw_bit_vec2[i]);
                    raw_bit_vec.push_back (raw_bit_vec1[i]);
                  }
              }

            float decode_error = 0;
            vector<int> bit_vec = code_decode_soft (ConvBlockType::ab, normalize_soft_bits (raw_bit_vec), &decode_error);
            if (!bit_vec.empty())
              {
                SyncFinder::Score sync_score_nopad = sync_score;
                sync_score_nopad.index = time_offset_sec * wav_data.sample_rate();
                result_set.add_pattern (sync_score_nopad, bit_vec, decode_error, ResultSet::Type::CLIP);
              }
          }
      }
  }
  enum class Pos { START, END };
  void
  run_block (const WavData& wav_data, ResultSet& result_set, Pos pos)
  {
    const size_t n = (frames_per_block + 5) * Params::frame_size * wav_data.n_channels();

    // range of samples used by clip: [first_sample, last_sample)
    size_t first_sample;
    size_t last_sample;
    size_t pad_samples_start = n;
    size_t pad_samples_end   = n;

    if (pos == Pos::START)
      {
        first_sample = 0;
        last_sample  = min (n, wav_data.n_values());

        // increase padding at start for small blocks
        //   -> (available samples + padding) must always be one L-block
        if (last_sample < n)
          pad_samples_start += n - last_sample;
      }
    else // (pos == Pos::END)
      {
        if (wav_data.n_values() <= n)
          return;

        first_sample = wav_data.n_values() - n;
        last_sample  = wav_data.n_values();
      }
    const double time_offset = double (first_sample) / wav_data.sample_rate() / wav_data.n_channels();
    vector<float> ext_samples (wav_data.samples().begin() + first_sample, wav_data.samples().begin() + last_sample);

    if (0)
      {
        printf ("%d: %f..%f\n", int (pos), time_offset, time_offset + double (ext_samples.size()) / wav_data.sample_rate() / wav_data.n_channels());
        printf ("%f< >%f\n",
          double (pad_samples_start) / wav_data.sample_rate() / wav_data.n_channels(),
          double (pad_samples_end) / wav_data.sample_rate() / wav_data.n_channels());
      }
    ext_samples.insert (ext_samples.begin(), pad_samples_start, 0);
    ext_samples.insert (ext_samples.end(),   pad_samples_end, 0);

    WavData l_wav_data (ext_samples, wav_data.n_channels(), wav_data.sample_rate(), wav_data.bit_depth());
    run_padded (l_wav_data, result_set, time_offset);
   }
public:
  ClipDecoder() :
    frames_per_block (mark_sync_frame_count() + mark_data_frame_count())
  {
  }
  void
  run (const WavData& wav_data, ResultSet& result_set)
  {
    const int wav_frames = wav_data.n_values() / (Params::frame_size * wav_data.n_channels());
    if (wav_frames < frames_per_block * 3.1) /* clip decoder is only used for small wavs */
      {
        run_block (wav_data, result_set, Pos::START);
        run_block (wav_data, result_set, Pos::END);
      }
  }
};

static int
decode_and_report (const WavData& wav_data, const string& orig_pattern)
{
  ResultSet result_set;

  /*
   * The strategy for integrating speed detection into decoding is this:
   *  - we always (unconditionally) try to decode the  watermark on the original wav data
   *  - if detected speed is somewhat different than 1.0, we also try to decode stretched data
   *  - we report all normal and speed results we get
   *
   * The reason to do it this way is that the detected speed may be wrong (on short clips)
   * and we don't want to loose a successful clip decoder match in this case.
   */
  if (Params::detect_speed)
    {
      double speed = detect_speed (wav_data, !orig_pattern.empty());

      // speeds closer to 1.0 than this usually work without stretching before decode
      if (speed < 0.9999 || speed > 1.0001)
        {
          printf ("speed %.6f\n", speed);
          WavData wav_data_speed = resample (wav_data, Params::mark_sample_rate * speed);

          result_set.set_speed_pattern (true);
          BlockDecoder block_decoder;
          block_decoder.run (wav_data_speed, result_set);

          ClipDecoder clip_decoder;
          clip_decoder.run (wav_data_speed, result_set);
          result_set.set_speed_pattern (false);
        }
    }

  BlockDecoder block_decoder;
  block_decoder.run (wav_data, result_set);

  ClipDecoder clip_decoder;
  clip_decoder.run (wav_data, result_set);
  result_set.print();

  if (!orig_pattern.empty())
    {
      int match_count = result_set.print_match_count (orig_pattern);

      block_decoder.print_debug_sync();

      if (!match_count)
        return 1;
    }
  return 0;
}

int
get_watermark (const string& infile, const string& orig_pattern)
{
  WavData wav_data;
  Error err = wav_data.load (infile);
  if (err)
    {
      error ("audiowmark: error loading %s: %s\n", infile.c_str(), err.message());
      return 1;
    }

  if (Params::test_truncate)
    {
      const size_t  want_n_samples = wav_data.sample_rate() * wav_data.n_channels() * Params::test_truncate;
      vector<float> short_samples  = wav_data.samples();

      if (want_n_samples < short_samples.size())
        {
          short_samples.resize (want_n_samples);
          wav_data.set_samples (short_samples);
        }
    }
  if (wav_data.sample_rate() == Params::mark_sample_rate)
    {
      return decode_and_report (wav_data, orig_pattern);
    }
  else
    {
      return decode_and_report (resample (wav_data, Params::mark_sample_rate), orig_pattern);
    }
}