1 /* 2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package java.util.stream; 26 27 import java.util.Collections; 28 import java.util.EnumSet; 29 import java.util.Objects; 30 import java.util.Set; 31 import java.util.function.*; 32 33 /** 34 * A <a href="package-summary.html#Reduction">mutable reduction operation</a> that 35 * accumulates input elements into a mutable result container, optionally transforming 36 * the accumulated result into a final representation after all input elements 37 * have been processed. Reduction operations can be performed either sequentially 38 * or in parallel. 39 * 40 * <p>Examples of mutable reduction operations include: 41 * accumulating elements into a {@code Collection}; concatenating 42 * strings using a {@code StringBuilder}; computing summary information about 43 * elements such as sum, min, max, or average; computing "pivot table" summaries 44 * such as "maximum valued transaction by seller", etc. The class {@link Collectors} 45 * provides implementations of many common mutable reductions. 46 * 47 * <p>A {@code Collector} is specified by four functions that work together to 48 * accumulate entries into a mutable result container, and optionally perform 49 * a final transform on the result. They are: <ul> 50 * <li>creation of a new result container ({@link #supplier()})</li> 51 * <li>incorporating a new data element into a result container ({@link #accumulator()})</li> 52 * <li>combining two result containers into one ({@link #combiner()})</li> 53 * <li>performing an optional final transform on the container ({@link #finisher()})</li> 54 * </ul> 55 * 56 * <p>Collectors also have a set of characteristics, such as 57 * {@link Characteristics#CONCURRENT}, that provide hints that can be used by a 58 * reduction implementation to provide better performance. 59 * 60 * <p>A sequential implementation of a reduction using a collector would 61 * create a single result container using the supplier function, and invoke the 62 * accumulator function once for each input element. A parallel implementation 63 * would partition the input, create a result container for each partition, 64 * accumulate the contents of each partition into a subresult for that partition, 65 * and then use the combiner function to merge the subresults into a combined 66 * result. 67 * 68 * <p>To ensure that sequential and parallel executions produce equivalent 69 * results, the collector functions must satisfy an <em>identity</em> and an 70 * <a href="package-summary.html#Associativity">associativity</a> constraints. 71 * 72 * <p>The identity constraint says that for any partially accumulated result, 73 * combining it with an empty result container must produce an equivalent 74 * result. That is, for a partially accumulated result {@code a} that is the 75 * result of any series of accumulator and combiner invocations, {@code a} must 76 * be equivalent to {@code combiner.apply(a, supplier.get())}. 77 * 78 * <p>The associativity constraint says that splitting the computation must 79 * produce an equivalent result. That is, for any input elements {@code t1} 80 * and {@code t2}, the results {@code r1} and {@code r2} in the computation 81 * below must be equivalent: 82 * <pre>{@code 83 * A a1 = supplier.get(); 84 * accumulator.accept(a1, t1); 85 * accumulator.accept(a1, t2); 86 * R r1 = finisher.apply(a1); // result without splitting 87 * 88 * A a2 = supplier.get(); 89 * accumulator.accept(a2, t1); 90 * A a3 = supplier.get(); 91 * accumulator.accept(a3, t2); 92 * R r2 = finisher.apply(combiner.apply(a2, a3)); // result with splitting 93 * } </pre> 94 * 95 * <p>For collectors that do not have the {@code UNORDERED} characteristic, 96 * two accumulated results {@code a1} and {@code a2} are equivalent if 97 * {@code finisher.apply(a1).equals(finisher.apply(a2))}. For unordered 98 * collectors, equivalence is relaxed to allow for non-equality related to 99 * differences in order. (For example, an unordered collector that accumulated 100 * elements to a {@code List} would consider two lists equivalent if they 101 * contained the same elements, ignoring order.) 102 * 103 * <p>Libraries that implement reduction based on {@code Collector}, such as 104 * {@link Stream#collect(Collector)}, must adhere to the following constraints: 105 * <ul> 106 * <li>The first argument passed to the accumulator function, both 107 * arguments passed to the combiner function, and the argument passed to the 108 * finisher function must be the result of a previous invocation of the 109 * result supplier, accumulator, or combiner functions.</li> 110 * <li>The implementation should not do anything with the result of any of 111 * the result supplier, accumulator, or combiner functions other than to 112 * pass them again to the accumulator, combiner, or finisher functions, 113 * or return them to the caller of the reduction operation.</li> 114 * <li>If a result is passed to the combiner or finisher 115 * function, and the same object is not returned from that function, it is 116 * never used again.</li> 117 * <li>Once a result is passed to the combiner or finisher function, it 118 * is never passed to the accumulator function again.</li> 119 * <li>For non-concurrent collectors, any result returned from the result 120 * supplier, accumulator, or combiner functions must be serially 121 * thread-confined. This enables collection to occur in parallel without 122 * the {@code Collector} needing to implement any additional synchronization. 123 * The reduction implementation must manage that the input is properly 124 * partitioned, that partitions are processed in isolation, and combining 125 * happens only after accumulation is complete.</li> 126 * <li>For concurrent collectors, an implementation is free to (but not 127 * required to) implement reduction concurrently. A concurrent reduction 128 * is one where the accumulator function is called concurrently from 129 * multiple threads, using the same concurrently-modifiable result container, 130 * rather than keeping the result isolated during accumulation. 131 * A concurrent reduction should only be applied if the collector has the 132 * {@link Characteristics#UNORDERED} characteristics or if the 133 * originating data is unordered.</li> 134 * </ul> 135 * 136 * <p>In addition to the predefined implementations in {@link Collectors}, the 137 * static factory methods {@link #of(Supplier, BiConsumer, BinaryOperator, Characteristics...)} 138 * can be used to construct collectors. For example, you could create a collector 139 * that accumulates widgets into a {@code TreeSet} with: 140 * 141 * <pre>{@code 142 * Collector<Widget, ?, TreeSet<Widget>> intoSet = 143 * Collector.of(TreeSet::new, TreeSet::add, 144 * (left, right) -> { left.addAll(right); return left; }); 145 * }</pre> 146 * 147 * (This behavior is also implemented by the predefined collector 148 * {@link Collectors#toCollection(Supplier)}). 149 * 150 * @apiNote 151 * Performing a reduction operation with a {@code Collector} should produce a 152 * result equivalent to: 153 * <pre>{@code 154 * R container = collector.supplier().get(); 155 * for (T t : data) 156 * collector.accumulator().accept(container, t); 157 * return collector.finisher().apply(container); 158 * }</pre> 159 * 160 * <p>However, the library is free to partition the input, perform the reduction 161 * on the partitions, and then use the combiner function to combine the partial 162 * results to achieve a parallel reduction. (Depending on the specific reduction 163 * operation, this may perform better or worse, depending on the relative cost 164 * of the accumulator and combiner functions.) 165 * 166 * <p>Collectors are designed to be <em>composed</em>; many of the methods 167 * in {@link Collectors} are functions that take a collector and produce 168 * a new collector. For example, given the following collector that computes 169 * the sum of the salaries of a stream of employees: 170 * 171 * <pre>{@code 172 * Collector<Employee, ?, Integer> summingSalaries 173 * = Collectors.summingInt(Employee::getSalary)) 174 * }</pre> 175 * 176 * If we wanted to create a collector to tabulate the sum of salaries by 177 * department, we could reuse the "sum of salaries" logic using 178 * {@link Collectors#groupingBy(Function, Collector)}: 179 * 180 * <pre>{@code 181 * Collector<Employee, ?, Map<Department, Integer>> summingSalariesByDept 182 * = Collectors.groupingBy(Employee::getDepartment, summingSalaries); 183 * }</pre> 184 * 185 * @see Stream#collect(Collector) 186 * @see Collectors 187 * 188 * @param <T> the type of input elements to the reduction operation 189 * @param <A> the mutable accumulation type of the reduction operation (often 190 * hidden as an implementation detail) 191 * @param <R> the result type of the reduction operation 192 * @since 1.8 193 */ 194 public interface Collector<T, A, R> { 195 /** 196 * A function that creates and returns a new mutable result container. 197 * 198 * @return a function which returns a new, mutable result container 199 */ 200 Supplier<A> supplier(); 201 202 /** 203 * A function that creates and returns a new mutable result container, 204 * when applied with an initial capacity. 205 * 206 * @return a function which returns a new, mutable result container 207 */ 208 default IntFunction<A> sizedSupplier() { 209 return ignored -> supplier().get(); 210 } 211 212 /** 213 * A function that folds a value into a mutable result container. 214 * 215 * @return a function which folds a value into a mutable result container 216 */ 217 BiConsumer<A, T> accumulator(); 218 219 /** 220 * A function that accepts two partial results and merges them. The 221 * combiner function may fold state from one argument into the other and 222 * return that, or may return a new result container. 223 * 224 * @return a function which combines two partial results into a combined 225 * result 226 */ 227 BinaryOperator<A> combiner(); 228 229 /** 230 * Perform the final transformation from the intermediate accumulation type 231 * {@code A} to the final result type {@code R}. 232 * 233 * <p>If the characteristic {@code IDENTITY_FINISH} is 234 * set, this function may be presumed to be an identity transform with an 235 * unchecked cast from {@code A} to {@code R}. 236 * 237 * @return a function which transforms the intermediate result to the final 238 * result 239 */ 240 Function<A, R> finisher(); 241 242 /** 243 * Returns a {@code Set} of {@code Collector.Characteristics} indicating 244 * the characteristics of this Collector. This set should be immutable. 245 * 246 * @return an immutable set of collector characteristics 247 */ 248 Set<Characteristics> characteristics(); 249 250 /** 251 * Returns a new {@code Collector} described by the given {@code supplier}, 252 * {@code accumulator}, and {@code combiner} functions. The resulting 253 * {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH} 254 * characteristic. 255 * 256 * @param supplier The supplier function for the new collector 257 * @param accumulator The accumulator function for the new collector 258 * @param combiner The combiner function for the new collector 259 * @param characteristics The collector characteristics for the new 260 * collector 261 * @param <T> The type of input elements for the new collector 262 * @param <R> The type of intermediate accumulation result, and final result, 263 * for the new collector 264 * @throws NullPointerException if any argument is null 265 * @return the new {@code Collector} 266 */ 267 public static<T, R> Collector<T, R, R> of(Supplier<R> supplier, 268 BiConsumer<R, T> accumulator, 269 BinaryOperator<R> combiner, 270 Characteristics... characteristics) { 271 return of(ignored -> supplier.get(), supplier, accumulator, combiner, characteristics); 272 } 273 274 /** 275 * Returns a new {@code Collector} described by the given {@code supplier}, 276 * {@code accumulator}, {@code combiner}, and {@code finisher} functions. 277 * 278 * @param supplier The supplier function for the new collector 279 * @param accumulator The accumulator function for the new collector 280 * @param combiner The combiner function for the new collector 281 * @param finisher The finisher function for the new collector 282 * @param characteristics The collector characteristics for the new 283 * collector 284 * @param <T> The type of input elements for the new collector 285 * @param <A> The intermediate accumulation type of the new collector 286 * @param <R> The final result type of the new collector 287 * @throws NullPointerException if any argument is null 288 * @return the new {@code Collector} 289 */ 290 public static<T, A, R> Collector<T, A, R> of(Supplier<A> supplier, 291 BiConsumer<A, T> accumulator, 292 BinaryOperator<A> combiner, 293 Function<A, R> finisher, 294 Characteristics... characteristics) { 295 return of(ignored -> supplier.get(), supplier, accumulator, combiner, finisher, characteristics); 296 } 297 298 /** 299 * Returns a new {@code Collector} described by the given {@code supplier}, 300 * {@code accumulator}, {@code combiner}, and {@code finisher} functions. 301 * 302 * @param sizedSupplier The sized supplier function for the new collector 303 * @param supplier The supplier function for the new collector 304 * @param accumulator The accumulator function for the new collector 305 * @param combiner The combiner function for the new collector 306 * @param characteristics The collector characteristics for the new 307 * collector 308 * @param <T> The type of input elements for the new collector 309 * @param <A> The intermediate accumulation type of the new collector 310 * @param <R> The final result type of the new collector 311 * @throws NullPointerException if any argument is null 312 * @return the new {@code Collector} 313 */ 314 public static<T, A, R> Collector<T, A, R> of(IntFunction<A> sizedSupplier, 315 Supplier<A> supplier, 316 BiConsumer<A, T> accumulator, 317 BinaryOperator<A> combiner, 318 Characteristics... characteristics) { 319 Objects.requireNonNull(sizedSupplier); 320 Objects.requireNonNull(supplier); 321 Objects.requireNonNull(accumulator); 322 Objects.requireNonNull(combiner); 323 Objects.requireNonNull(characteristics); 324 Set<Characteristics> cs = (characteristics.length == 0) 325 ? Collectors.CH_ID 326 : Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.IDENTITY_FINISH, 327 characteristics)); 328 return new Collectors.CollectorImpl<>(sizedSupplier, supplier, accumulator, combiner, cs); 329 } 330 331 /** 332 * Returns a new {@code Collector} described by the given {@code supplier}, 333 * {@code accumulator}, {@code combiner}, and {@code finisher} functions. 334 * 335 * @param sizedSupplier The sized supplier function for the new collector 336 * @param supplier The supplier function for the new collector 337 * @param accumulator The accumulator function for the new collector 338 * @param combiner The combiner function for the new collector 339 * @param finisher The finisher function for the new collector 340 * @param characteristics The collector characteristics for the new 341 * collector 342 * @param <T> The type of input elements for the new collector 343 * @param <A> The intermediate accumulation type of the new collector 344 * @param <R> The final result type of the new collector 345 * @throws NullPointerException if any argument is null 346 * @return the new {@code Collector} 347 */ 348 public static<T, A, R> Collector<T, A, R> of(IntFunction<A> sizedSupplier, 349 Supplier<A> supplier, 350 BiConsumer<A, T> accumulator, 351 BinaryOperator<A> combiner, 352 Function<A, R> finisher, 353 Characteristics... characteristics) { 354 Objects.requireNonNull(sizedSupplier); 355 Objects.requireNonNull(supplier); 356 Objects.requireNonNull(accumulator); 357 Objects.requireNonNull(combiner); 358 Objects.requireNonNull(finisher); 359 Objects.requireNonNull(characteristics); 360 Set<Characteristics> cs = Collectors.CH_NOID; 361 if (characteristics.length > 0) { 362 cs = EnumSet.noneOf(Characteristics.class); 363 Collections.addAll(cs, characteristics); 364 cs = Collections.unmodifiableSet(cs); 365 } 366 return new Collectors.CollectorImpl<>(sizedSupplier, supplier, accumulator, combiner, finisher, cs); 367 } 368 369 /** 370 * Characteristics indicating properties of a {@code Collector}, which can 371 * be used to optimize reduction implementations. 372 */ 373 enum Characteristics { 374 /** 375 * Indicates that this collector is <em>concurrent</em>, meaning that 376 * the result container can support the accumulator function being 377 * called concurrently with the same result container from multiple 378 * threads. 379 * 380 * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED}, 381 * then it should only be evaluated concurrently if applied to an 382 * unordered data source. 383 */ 384 CONCURRENT, 385 386 /** 387 * Indicates that the collection operation does not commit to preserving 388 * the encounter order of input elements. (This might be true if the 389 * result container has no intrinsic order, such as a {@link Set}.) 390 */ 391 UNORDERED, 392 393 /** 394 * Indicates that the finisher function is the identity function and 395 * can be elided. If set, it must be the case that an unchecked cast 396 * from A to R will succeed. 397 */ 398 IDENTITY_FINISH 399 } 400 }