1 /*
   2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package java.util.stream;
  26 
  27 import java.util.Collections;
  28 import java.util.EnumSet;
  29 import java.util.Objects;
  30 import java.util.Set;
  31 import java.util.function.*;
  32 
  33 /**
  34  * A <a href="package-summary.html#Reduction">mutable reduction operation</a> that
  35  * accumulates input elements into a mutable result container, optionally transforming
  36  * the accumulated result into a final representation after all input elements
  37  * have been processed.  Reduction operations can be performed either sequentially
  38  * or in parallel.
  39  *
  40  * <p>Examples of mutable reduction operations include:
  41  * accumulating elements into a {@code Collection}; concatenating
  42  * strings using a {@code StringBuilder}; computing summary information about
  43  * elements such as sum, min, max, or average; computing "pivot table" summaries
  44  * such as "maximum valued transaction by seller", etc.  The class {@link Collectors}
  45  * provides implementations of many common mutable reductions.
  46  *
  47  * <p>A {@code Collector} is specified by four functions that work together to
  48  * accumulate entries into a mutable result container, and optionally perform
  49  * a final transform on the result.  They are: <ul>
  50  *     <li>creation of a new result container ({@link #supplier()})</li>
  51  *     <li>incorporating a new data element into a result container ({@link #accumulator()})</li>
  52  *     <li>combining two result containers into one ({@link #combiner()})</li>
  53  *     <li>performing an optional final transform on the container ({@link #finisher()})</li>
  54  * </ul>
  55  *
  56  * <p>Collectors also have a set of characteristics, such as
  57  * {@link Characteristics#CONCURRENT}, that provide hints that can be used by a
  58  * reduction implementation to provide better performance.
  59  *
  60  * <p>A sequential implementation of a reduction using a collector would
  61  * create a single result container using the supplier function, and invoke the
  62  * accumulator function once for each input element.  A parallel implementation
  63  * would partition the input, create a result container for each partition,
  64  * accumulate the contents of each partition into a subresult for that partition,
  65  * and then use the combiner function to merge the subresults into a combined
  66  * result.
  67  *
  68  * <p>To ensure that sequential and parallel executions produce equivalent
  69  * results, the collector functions must satisfy an <em>identity</em> and an
  70  * <a href="package-summary.html#Associativity">associativity</a> constraints.
  71  *
  72  * <p>The identity constraint says that for any partially accumulated result,
  73  * combining it with an empty result container must produce an equivalent
  74  * result.  That is, for a partially accumulated result {@code a} that is the
  75  * result of any series of accumulator and combiner invocations, {@code a} must
  76  * be equivalent to {@code combiner.apply(a, supplier.get())}.
  77  *
  78  * <p>The associativity constraint says that splitting the computation must
  79  * produce an equivalent result.  That is, for any input elements {@code t1}
  80  * and {@code t2}, the results {@code r1} and {@code r2} in the computation
  81  * below must be equivalent:
  82  * <pre>{@code
  83  *     A a1 = supplier.get();
  84  *     accumulator.accept(a1, t1);
  85  *     accumulator.accept(a1, t2);
  86  *     R r1 = finisher.apply(a1);  // result without splitting
  87  *
  88  *     A a2 = supplier.get();
  89  *     accumulator.accept(a2, t1);
  90  *     A a3 = supplier.get();
  91  *     accumulator.accept(a3, t2);
  92  *     R r2 = finisher.apply(combiner.apply(a2, a3));  // result with splitting
  93  * } </pre>
  94  *
  95  * <p>For collectors that do not have the {@code UNORDERED} characteristic,
  96  * two accumulated results {@code a1} and {@code a2} are equivalent if
  97  * {@code finisher.apply(a1).equals(finisher.apply(a2))}.  For unordered
  98  * collectors, equivalence is relaxed to allow for non-equality related to
  99  * differences in order.  (For example, an unordered collector that accumulated
 100  * elements to a {@code List} would consider two lists equivalent if they
 101  * contained the same elements, ignoring order.)
 102  *
 103  * <p>Libraries that implement reduction based on {@code Collector}, such as
 104  * {@link Stream#collect(Collector)}, must adhere to the following constraints:
 105  * <ul>
 106  *     <li>The first argument passed to the accumulator function, both
 107  *     arguments passed to the combiner function, and the argument passed to the
 108  *     finisher function must be the result of a previous invocation of the
 109  *     result supplier, accumulator, or combiner functions.</li>
 110  *     <li>The implementation should not do anything with the result of any of
 111  *     the result supplier, accumulator, or combiner functions other than to
 112  *     pass them again to the accumulator, combiner, or finisher functions,
 113  *     or return them to the caller of the reduction operation.</li>
 114  *     <li>If a result is passed to the combiner or finisher
 115  *     function, and the same object is not returned from that function, it is
 116  *     never used again.</li>
 117  *     <li>Once a result is passed to the combiner or finisher function, it
 118  *     is never passed to the accumulator function again.</li>
 119  *     <li>For non-concurrent collectors, any result returned from the result
 120  *     supplier, accumulator, or combiner functions must be serially
 121  *     thread-confined.  This enables collection to occur in parallel without
 122  *     the {@code Collector} needing to implement any additional synchronization.
 123  *     The reduction implementation must manage that the input is properly
 124  *     partitioned, that partitions are processed in isolation, and combining
 125  *     happens only after accumulation is complete.</li>
 126  *     <li>For concurrent collectors, an implementation is free to (but not
 127  *     required to) implement reduction concurrently.  A concurrent reduction
 128  *     is one where the accumulator function is called concurrently from
 129  *     multiple threads, using the same concurrently-modifiable result container,
 130  *     rather than keeping the result isolated during accumulation.
 131  *     A concurrent reduction should only be applied if the collector has the
 132  *     {@link Characteristics#UNORDERED} characteristics or if the
 133  *     originating data is unordered.</li>
 134  * </ul>
 135  *
 136  * <p>In addition to the predefined implementations in {@link Collectors}, the
 137  * static factory methods {@link #of(Supplier, BiConsumer, BinaryOperator, Characteristics...)}
 138  * can be used to construct collectors.  For example, you could create a collector
 139  * that accumulates widgets into a {@code TreeSet} with:
 140  *
 141  * <pre>{@code
 142  *     Collector<Widget, ?, TreeSet<Widget>> intoSet =
 143  *         Collector.of(TreeSet::new, TreeSet::add,
 144  *                      (left, right) -> { left.addAll(right); return left; });
 145  * }</pre>
 146  *
 147  * (This behavior is also implemented by the predefined collector
 148  * {@link Collectors#toCollection(Supplier)}).
 149  *
 150  * @apiNote
 151  * Performing a reduction operation with a {@code Collector} should produce a
 152  * result equivalent to:
 153  * <pre>{@code
 154  *     R container = collector.supplier().get();
 155  *     for (T t : data)
 156  *         collector.accumulator().accept(container, t);
 157  *     return collector.finisher().apply(container);
 158  * }</pre>
 159  *
 160  * <p>However, the library is free to partition the input, perform the reduction
 161  * on the partitions, and then use the combiner function to combine the partial
 162  * results to achieve a parallel reduction.  (Depending on the specific reduction
 163  * operation, this may perform better or worse, depending on the relative cost
 164  * of the accumulator and combiner functions.)
 165  *
 166  * <p>Collectors are designed to be <em>composed</em>; many of the methods
 167  * in {@link Collectors} are functions that take a collector and produce
 168  * a new collector.  For example, given the following collector that computes
 169  * the sum of the salaries of a stream of employees:
 170  *
 171  * <pre>{@code
 172  *     Collector<Employee, ?, Integer> summingSalaries
 173  *         = Collectors.summingInt(Employee::getSalary))
 174  * }</pre>
 175  *
 176  * If we wanted to create a collector to tabulate the sum of salaries by
 177  * department, we could reuse the "sum of salaries" logic using
 178  * {@link Collectors#groupingBy(Function, Collector)}:
 179  *
 180  * <pre>{@code
 181  *     Collector<Employee, ?, Map<Department, Integer>> summingSalariesByDept
 182  *         = Collectors.groupingBy(Employee::getDepartment, summingSalaries);
 183  * }</pre>
 184  *
 185  * @see Stream#collect(Collector)
 186  * @see Collectors
 187  *
 188  * @param <T> the type of input elements to the reduction operation
 189  * @param <A> the mutable accumulation type of the reduction operation (often
 190  *            hidden as an implementation detail)
 191  * @param <R> the result type of the reduction operation
 192  * @since 1.8
 193  */
 194 public interface Collector<T, A, R> {
 195     /**
 196      * A function that creates and returns a new mutable result container.
 197      *
 198      * @return a function which returns a new, mutable result container
 199      */
 200     Supplier<A> supplier();
 201 
 202     /**
 203      * A function that creates and returns a new mutable result container,
 204      * when applied with an initial capacity.
 205      *
 206      * @return a function which returns a new, mutable result container
 207      */
 208     default IntFunction<A> sizedSupplier() {
 209         return ignored -> supplier().get();
 210     }
 211 
 212     /**
 213      * A function that folds a value into a mutable result container.
 214      *
 215      * @return a function which folds a value into a mutable result container
 216      */
 217     BiConsumer<A, T> accumulator();
 218 
 219     /**
 220      * A function that accepts two partial results and merges them.  The
 221      * combiner function may fold state from one argument into the other and
 222      * return that, or may return a new result container.
 223      *
 224      * @return a function which combines two partial results into a combined
 225      * result
 226      */
 227     BinaryOperator<A> combiner();
 228 
 229     /**
 230      * Perform the final transformation from the intermediate accumulation type
 231      * {@code A} to the final result type {@code R}.
 232      *
 233      * <p>If the characteristic {@code IDENTITY_FINISH} is
 234      * set, this function may be presumed to be an identity transform with an
 235      * unchecked cast from {@code A} to {@code R}.
 236      *
 237      * @return a function which transforms the intermediate result to the final
 238      * result
 239      */
 240     Function<A, R> finisher();
 241 
 242     /**
 243      * Returns a {@code Set} of {@code Collector.Characteristics} indicating
 244      * the characteristics of this Collector.  This set should be immutable.
 245      *
 246      * @return an immutable set of collector characteristics
 247      */
 248     Set<Characteristics> characteristics();
 249 
 250     /**
 251      * Returns a new {@code Collector} described by the given {@code supplier},
 252      * {@code accumulator}, and {@code combiner} functions.  The resulting
 253      * {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH}
 254      * characteristic.
 255      *
 256      * @param supplier The supplier function for the new collector
 257      * @param accumulator The accumulator function for the new collector
 258      * @param combiner The combiner function for the new collector
 259      * @param characteristics The collector characteristics for the new
 260      *                        collector
 261      * @param <T> The type of input elements for the new collector
 262      * @param <R> The type of intermediate accumulation result, and final result,
 263      *           for the new collector
 264      * @throws NullPointerException if any argument is null
 265      * @return the new {@code Collector}
 266      */
 267     public static<T, R> Collector<T, R, R> of(Supplier<R> supplier,
 268                                               BiConsumer<R, T> accumulator,
 269                                               BinaryOperator<R> combiner,
 270                                               Characteristics... characteristics) {
 271         return of(ignored -> supplier.get(), supplier, accumulator, combiner, characteristics);
 272     }
 273 
 274     /**
 275      * Returns a new {@code Collector} described by the given {@code supplier},
 276      * {@code accumulator}, {@code combiner}, and {@code finisher} functions.
 277      *
 278      * @param supplier The supplier function for the new collector
 279      * @param accumulator The accumulator function for the new collector
 280      * @param combiner The combiner function for the new collector
 281      * @param finisher The finisher function for the new collector
 282      * @param characteristics The collector characteristics for the new
 283      *                        collector
 284      * @param <T> The type of input elements for the new collector
 285      * @param <A> The intermediate accumulation type of the new collector
 286      * @param <R> The final result type of the new collector
 287      * @throws NullPointerException if any argument is null
 288      * @return the new {@code Collector}
 289      */
 290     public static<T, A, R> Collector<T, A, R> of(Supplier<A> supplier,
 291                                                  BiConsumer<A, T> accumulator,
 292                                                  BinaryOperator<A> combiner,
 293                                                  Function<A, R> finisher,
 294                                                  Characteristics... characteristics) {
 295         return of(ignored -> supplier.get(), supplier, accumulator, combiner, finisher, characteristics);
 296     }
 297 
 298     /**
 299      * Returns a new {@code Collector} described by the given {@code supplier},
 300      * {@code accumulator}, {@code combiner}, and {@code finisher} functions.
 301      *
 302      * @param sizedSupplier The sized supplier function for the new collector
 303      * @param supplier The supplier function for the new collector
 304      * @param accumulator The accumulator function for the new collector
 305      * @param combiner The combiner function for the new collector
 306      * @param characteristics The collector characteristics for the new
 307      *                        collector
 308      * @param <T> The type of input elements for the new collector
 309      * @param <A> The intermediate accumulation type of the new collector
 310      * @param <R> The final result type of the new collector
 311      * @throws NullPointerException if any argument is null
 312      * @return the new {@code Collector}
 313      */
 314     public static<T, A, R> Collector<T, A, R> of(IntFunction<A> sizedSupplier,
 315                                                  Supplier<A> supplier,
 316                                                  BiConsumer<A, T> accumulator,
 317                                                  BinaryOperator<A> combiner,
 318                                                  Characteristics... characteristics) {
 319         Objects.requireNonNull(sizedSupplier);
 320         Objects.requireNonNull(supplier);
 321         Objects.requireNonNull(accumulator);
 322         Objects.requireNonNull(combiner);
 323         Objects.requireNonNull(characteristics);
 324         Set<Characteristics> cs = (characteristics.length == 0)
 325                                   ? Collectors.CH_ID
 326                                   : Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.IDENTITY_FINISH,
 327                                                                            characteristics));
 328         return new Collectors.CollectorImpl<>(sizedSupplier, supplier, accumulator, combiner, cs);
 329     }
 330 
 331     /**
 332      * Returns a new {@code Collector} described by the given {@code supplier},
 333      * {@code accumulator}, {@code combiner}, and {@code finisher} functions.
 334      *
 335      * @param sizedSupplier The sized supplier function for the new collector
 336      * @param supplier The supplier function for the new collector
 337      * @param accumulator The accumulator function for the new collector
 338      * @param combiner The combiner function for the new collector
 339      * @param finisher The finisher function for the new collector
 340      * @param characteristics The collector characteristics for the new
 341      *                        collector
 342      * @param <T> The type of input elements for the new collector
 343      * @param <A> The intermediate accumulation type of the new collector
 344      * @param <R> The final result type of the new collector
 345      * @throws NullPointerException if any argument is null
 346      * @return the new {@code Collector}
 347      */
 348     public static<T, A, R> Collector<T, A, R> of(IntFunction<A> sizedSupplier,
 349                                                  Supplier<A> supplier,
 350                                                  BiConsumer<A, T> accumulator,
 351                                                  BinaryOperator<A> combiner,
 352                                                  Function<A, R> finisher,
 353                                                  Characteristics... characteristics) {
 354         Objects.requireNonNull(sizedSupplier);
 355         Objects.requireNonNull(supplier);
 356         Objects.requireNonNull(accumulator);
 357         Objects.requireNonNull(combiner);
 358         Objects.requireNonNull(finisher);
 359         Objects.requireNonNull(characteristics);
 360         Set<Characteristics> cs = Collectors.CH_NOID;
 361         if (characteristics.length > 0) {
 362             cs = EnumSet.noneOf(Characteristics.class);
 363             Collections.addAll(cs, characteristics);
 364             cs = Collections.unmodifiableSet(cs);
 365         }
 366         return new Collectors.CollectorImpl<>(sizedSupplier, supplier, accumulator, combiner, finisher, cs);
 367     }
 368 
 369     /**
 370      * Characteristics indicating properties of a {@code Collector}, which can
 371      * be used to optimize reduction implementations.
 372      */
 373     enum Characteristics {
 374         /**
 375          * Indicates that this collector is <em>concurrent</em>, meaning that
 376          * the result container can support the accumulator function being
 377          * called concurrently with the same result container from multiple
 378          * threads.
 379          *
 380          * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
 381          * then it should only be evaluated concurrently if applied to an
 382          * unordered data source.
 383          */
 384         CONCURRENT,
 385 
 386         /**
 387          * Indicates that the collection operation does not commit to preserving
 388          * the encounter order of input elements.  (This might be true if the
 389          * result container has no intrinsic order, such as a {@link Set}.)
 390          */
 391         UNORDERED,
 392 
 393         /**
 394          * Indicates that the finisher function is the identity function and
 395          * can be elided.  If set, it must be the case that an unchecked cast
 396          * from A to R will succeed.
 397          */
 398         IDENTITY_FINISH
 399     }
 400 }