/* Copyright 2016--2020 The Tor Project
 * See LICENSE for licensing information */

package org.torproject.metrics.stats.hidserv;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

/** Extrapolate hidden-service statistics reported by single relays by
 * dividing them by the computed fraction of hidden-service activity
 * observed by the relay. */
public class Extrapolator {

  private static final Logger logger
      = LoggerFactory.getLogger(Extrapolator.class);

  /** Document file containing previously parsed reported hidden-service
   * statistics. */
  private File reportedHidServStatsFile;

  /** Document store for storing and retrieving reported hidden-service
   * statistics. */
  private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;

  /** Directory containing document files with previously computed network
   * fractions. */
  private File computedNetworkFractionsDirectory;

  /** Document store for storing and retrieving computed network
   * fractions. */
  private DocumentStore<ComputedNetworkFractions>
      computedNetworkFractionsStore;

  /** Document file containing extrapolated hidden-service statistics. */
  private File extrapolatedHidServStatsFile;

  /** Document store for storing and retrieving extrapolated hidden-service
   * statistics. */
  private DocumentStore<ExtrapolatedHidServStats>
      extrapolatedHidServStatsStore;

  /** Document file containing previously parsed reported directory-requests
   * statistics. */
  private File reportedV3HidServStatsFile;

  /** Document store for storing and retrieving reported directory-requests
   * statistics. */
  private DocumentStore<ReportedHidServStats> reportedV3HidServStatsStore;

  /** Document file containing extrapolated directory-requests statistics. */
  private File extrapolatedV3HidServStatsFile;

  /** Document store for storing and retrieving extrapolated directory-requests
   * statistics. */
  private DocumentStore<ExtrapolatedHidServStats>
      extrapolatedV3HidServStatsStore;

  /** Initializes a new extrapolator object using the given directory and
   * document stores. */
  public Extrapolator(File statusDirectory,
      DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
      DocumentStore<ReportedHidServStats> reportedV3HidServStatsStore,
      DocumentStore<ComputedNetworkFractions>
      computedNetworkFractionsStore,
      DocumentStore<ExtrapolatedHidServStats>
      extrapolatedHidServStatsStore,
      DocumentStore<ExtrapolatedHidServStats>
      extrapolatedV3HidServStatsStore) {

    /* Create File instances for the files and directories in the provided
     * status directory. */
    this.reportedHidServStatsFile = new File(statusDirectory,
        "reported-hidserv-stats");
    this.computedNetworkFractionsDirectory =
        new File(statusDirectory, "computed-network-fractions");
    this.extrapolatedHidServStatsFile = new File(statusDirectory,
        "extrapolated-hidserv-stats");
    this.reportedV3HidServStatsFile = new File(statusDirectory,
        "reported-v3hidserv-stats");
    this.extrapolatedV3HidServStatsFile = new File(statusDirectory,
        "extrapolated-v3hidserv-stats");
    /* Store references to the provided document stores. */
    this.reportedHidServStatsStore = reportedHidServStatsStore;
    this.computedNetworkFractionsStore = computedNetworkFractionsStore;
    this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
    this.reportedV3HidServStatsStore = reportedV3HidServStatsStore;
    this.extrapolatedV3HidServStatsStore = extrapolatedV3HidServStatsStore;
  }

  /** Iterates over all reported stats and extrapolate network totals for
   * those that have not been extrapolated before. */
  public boolean extrapolateHidServStats() {

    /* Retrieve previously extrapolated stats to avoid extrapolating them
     * again. */
    Set<ExtrapolatedHidServStats> extrapolatedStats =
        this.extrapolatedHidServStatsStore.retrieve(
        this.extrapolatedHidServStatsFile);

    /* Retrieve all reported stats, even including those that have already
     * been extrapolated. */
    Set<ReportedHidServStats> reportedStats =
        this.reportedHidServStatsStore.retrieve(
        this.reportedHidServStatsFile);

    Set<ExtrapolatedHidServStats> computedStats = computeExtrapolatedStats(
        extrapolatedStats, reportedStats, "v2");

    /* Store all extrapolated network totals to disk with help of the
     * document store. */
    return this.extrapolatedHidServStatsStore.store(
        this.extrapolatedHidServStatsFile, computedStats);
  }

  /** Iterates over all reported stats and extrapolate network totals for
   * those that have not been extrapolated before. */
  public boolean extrapolateV3HidServStats() {

    /* Retrieve previously extrapolated stats to avoid extrapolating them
     * again. */
    Set<ExtrapolatedHidServStats> extrapolatedStats =
        this.extrapolatedV3HidServStatsStore.retrieve(
        this.extrapolatedV3HidServStatsFile);

    /* Retrieve all reported stats, even including those that have already
     * been extrapolated. */
    Set<ReportedHidServStats> reportedStats =
        this.reportedV3HidServStatsStore.retrieve(
        this.reportedV3HidServStatsFile);

    Set<ExtrapolatedHidServStats> computedStats = computeExtrapolatedStats(
        extrapolatedStats, reportedStats, "v3");

    /* Store all extrapolated network totals to disk with help of the
     * document store. */
    return this.extrapolatedV3HidServStatsStore.store(
        this.extrapolatedV3HidServStatsFile, computedStats);
  }

  private Set<ExtrapolatedHidServStats> computeExtrapolatedStats(
      Set<ExtrapolatedHidServStats> extrapolatedStats,
      Set<ReportedHidServStats> reportedStats, String version) {
    /* Make sure that all documents could be retrieved correctly. */
    if (extrapolatedStats == null || reportedStats == null) {
      logger.warn("Could not read previously parsed or extrapolated "
          + "stats files. Skipping.");
      return null;
    }

    /* Re-arrange reported stats by fingerprint. */
    SortedMap<String, Set<ReportedHidServStats>> parsedStatsByFingerprint =
        new TreeMap<>();
    for (ReportedHidServStats stat : reportedStats) {
      String fingerprint = stat.getFingerprint();
      parsedStatsByFingerprint.putIfAbsent(fingerprint, new HashSet<>());
      parsedStatsByFingerprint.get(fingerprint).add(stat);
    }

    /* Go through reported stats by fingerprint. */
    for (Map.Entry<String, Set<ReportedHidServStats>> e
        : parsedStatsByFingerprint.entrySet()) {
      String fingerprint = e.getKey();

      /* Iterate over all stats reported by this relay and make a list of
       * those that still need to be extrapolated.  Also make a list of
       * all dates for which we need to retrieve computed network
       * fractions. */
      Set<ReportedHidServStats> newReportedStats = new HashSet<>();
      SortedSet<String> retrieveFractionDates = new TreeSet<>();
      for (ReportedHidServStats stats : e.getValue()) {

        /* Check whether extrapolated stats already contain an object with
         * the same statistics interval end date and fingerprint. */
        long statsDateMillis = (stats.getStatsEndMillis()
            / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
        if (extrapolatedStats.contains(
            new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) {
          continue;
        }

        /* Add the reported stats to the list of stats we still need to
         * extrapolate. */
        newReportedStats.add(stats);

        /* Add all dates between statistics interval start and end to a
         * list. */
        long statsEndMillis = stats.getStatsEndMillis();
        long statsStartMillis = statsEndMillis
            - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
        for (long millis = statsStartMillis; millis <= statsEndMillis;
            millis += DateTimeHelper.ONE_DAY) {
          String date = DateTimeHelper.format(millis,
              DateTimeHelper.ISO_DATE_FORMAT);
          retrieveFractionDates.add(date);
        }
      }

      /* Retrieve all computed network fractions that might be needed to
       * extrapolate new statistics.  Keep a list of all known consensus
       * valid-after times, and keep a map of fractions also by consensus
       * valid-after time.  (It's not sufficient to only keep the latter,
       * because we need to count known consensuses even if the relay was
       * not contained in a consensus or had a network fraction of exactly
       * zero.) */
      SortedSet<Long> knownConsensuses = new TreeSet<>();
      SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions =
          new TreeMap<>();
      for (String date : retrieveFractionDates) {
        File documentFile = new File(
            this.computedNetworkFractionsDirectory, date);
        Set<ComputedNetworkFractions> fractions
            = this.computedNetworkFractionsStore.retrieve(documentFile,
            fingerprint);
        for (ComputedNetworkFractions fraction : fractions) {
          knownConsensuses.add(fraction.getValidAfterMillis());
          if (fraction.getFingerprint().equals(fingerprint)) {
            computedNetworkFractions.put(fraction.getValidAfterMillis(),
                fraction);
          }
        }
      }

      /* Go through newly reported stats, match them with computed network
       * fractions, and extrapolate network totals. */
      for (ReportedHidServStats stats : newReportedStats) {
        long statsEndMillis = stats.getStatsEndMillis();
        long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY)
            * DateTimeHelper.ONE_DAY;
        long statsStartMillis = statsEndMillis
            - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;

        /* Sum up computed network fractions and count known consensus in
         * the relevant interval, so that we can later compute means of
         * network fractions. */
        double sumFractionRendRelayedCells = 0.0;
        double sumFractionDirOnionsSeen = 0.0;
        int consensuses = 0;
        for (long validAfterMillis : knownConsensuses) {
          if (statsStartMillis <= validAfterMillis
              && validAfterMillis < statsEndMillis) {
            if (computedNetworkFractions.containsKey(validAfterMillis)) {
              ComputedNetworkFractions frac =
                  computedNetworkFractions.get(validAfterMillis);
              sumFractionRendRelayedCells +=
                  frac.getFractionRendRelayedCells();
              sumFractionDirOnionsSeen +=
                  frac.getFractionDirOnionsSeen();
            }
            consensuses++;
          }
        }

        /* Compute means of network fractions, or assume 0.0 if we don't
         * know a single consensus with valid-after time in the statistics
         * interval. */
        double fractionRendRelayedCells = consensuses == 0 ? 0.0
            : sumFractionRendRelayedCells / consensuses;
        double fractionDirOnionsSeen = consensuses == 0 ? 0.0
            : sumFractionDirOnionsSeen / consensuses;

        /* Extrapolate network totals. If we don't know a single
         * consensus, store an empty statistic anyway to avoid processing
         * these reported statistics over and over. */
        ExtrapolatedHidServStats extrapolated =
            new ExtrapolatedHidServStats(
            statsDateMillis, fingerprint);
        if (fractionRendRelayedCells > 0.0) {
          extrapolated.setFractionRendRelayedCells(
              fractionRendRelayedCells);
          /* Extrapolating cells on rendezvous circuits is as easy as
           * dividing the reported number by the computed network
           * fraction. */
          double extrapolatedRendRelayedCells =
              stats.getRendRelayedCells() / fractionRendRelayedCells;
          extrapolated.setExtrapolatedRendRelayedCells(
              extrapolatedRendRelayedCells);
        }
        if (fractionDirOnionsSeen > 0.0) {
          extrapolated.setFractionDirOnionsSeen(
              fractionDirOnionsSeen);
          double extrapolatedDirOnionsSeen = 0;
          if (version == "v2") {
            /* Extrapolating reported unique .onion addresses to the
             * total number in the network is more difficult.  In short,
             * each descriptor is stored to 12 (likely) different
             * directories, so we'll have to divide the reported number by
             * 12 and then by the computed network fraction of this
             * directory. */
            extrapolatedDirOnionsSeen =
                stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen);
          } else if (version == "v3") {
            /* When it comes to the number of onion services, the logic is
             * identical between v2 and v3 stats but in the Extrapolator we
             * should be dividing by 24 instead of 12. That's because in v3
             * onions, each descriptor has 2 replicas and 4 uploads for each
             * replica, so in total each descriptor get uploaded to 8 HSDirs.
             * Furthermore, each onion service maintains two descriptors at any
             * given time (so 16 uploads), and rotates one descriptor at around
             * 00:00 UTC  (so another 8 uploads). This means that between the
             * interval of 12:00UTC to 12:00UTC the HSDirs will be exposed to
             * 24 uploads for each HS. This is precisely the reason we choose
             * to collect stats from 12:00UTC to 12:00UTC, and this logic has
             * not been documented somewhere yet but we should find a good
             * place to document it.
             */
            extrapolatedDirOnionsSeen =
                stats.getDirOnionsSeen() / (24.0 * fractionDirOnionsSeen);
          }
          extrapolated.setExtrapolatedDirOnionsSeen(
              extrapolatedDirOnionsSeen);
        }
        extrapolatedStats.add(extrapolated);
      }
    }
    return extrapolatedStats;
  }
}
