1 | /* |
2 | |
3 | Derby - Class org.apache.derby.impl.store.access.sort.ExternalSortFactory |
4 | |
5 | Copyright 1997, 2004 The Apache Software Foundation or its licensors, as applicable. |
6 | |
7 | Licensed under the Apache License, Version 2.0 (the "License"); |
8 | you may not use this file except in compliance with the License. |
9 | You may obtain a copy of the License at |
10 | |
11 | http://www.apache.org/licenses/LICENSE-2.0 |
12 | |
13 | Unless required by applicable law or agreed to in writing, software |
14 | distributed under the License is distributed on an "AS IS" BASIS, |
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | See the License for the specific language governing permissions and |
17 | limitations under the License. |
18 | |
19 | */ |
20 | |
21 | package org.apache.derby.impl.store.access.sort; |
22 | |
23 | import java.util.Properties; |
24 | |
25 | import org.apache.derby.iapi.services.monitor.ModuleControl; |
26 | import org.apache.derby.iapi.services.monitor.ModuleSupportable; |
27 | import org.apache.derby.iapi.services.monitor.Monitor; |
28 | import org.apache.derby.iapi.services.property.PropertyUtil; |
29 | import org.apache.derby.iapi.services.sanity.SanityManager; |
30 | |
31 | import org.apache.derby.iapi.error.StandardException; |
32 | |
33 | import org.apache.derby.iapi.store.access.conglomerate.MethodFactory; |
34 | import org.apache.derby.iapi.store.access.conglomerate.Sort; |
35 | import org.apache.derby.iapi.store.access.conglomerate.SortFactory; |
36 | |
37 | import org.apache.derby.iapi.store.access.SortObserver; |
38 | import org.apache.derby.iapi.store.access.SortCostController; |
39 | import org.apache.derby.iapi.store.access.ColumnOrdering; |
40 | import org.apache.derby.iapi.store.access.TransactionController; |
41 | |
42 | import org.apache.derby.iapi.types.DataValueDescriptor; |
43 | |
44 | import org.apache.derby.iapi.services.uuid.UUIDFactory; |
45 | |
46 | import org.apache.derby.catalog.UUID; |
47 | |
48 | /** |
49 | |
50 | **/ |
51 | |
52 | public class ExternalSortFactory implements |
53 | SortFactory, ModuleControl, ModuleSupportable, SortCostController |
54 | { |
55 | |
56 | private boolean userSpecified; // did the user specify sortBufferMax |
57 | private int defaultSortBufferMax; |
58 | private int sortBufferMax; |
59 | |
60 | private static final String IMPLEMENTATIONID = "sort external"; |
61 | private static final String FORMATUUIDSTRING = "D2976090-D9F5-11d0-B54D-00A024BF8879"; |
62 | private UUID formatUUID = null; |
63 | private static final int DEFAULT_SORTBUFFERMAX = 1024; |
64 | private static final int MINIMUM_SORTBUFFERMAX = 4; |
65 | |
66 | protected static final int DEFAULT_MEM_USE = 1024*1024; // aim for about 1Meg |
67 | // how many sort runs to combined into a larger sort run |
68 | protected static final int DEFAULT_MAX_MERGE_RUN = 1024; |
69 | |
70 | // sizeof Node + reference to Node + 12 bytes tax |
71 | private static final int SORT_ROW_OVERHEAD = 8*4+12; |
72 | |
73 | |
74 | /* |
75 | ** Methods of MethodFactory |
76 | */ |
77 | |
78 | /** |
79 | There are no default properties for the external sort.. |
80 | @see MethodFactory#defaultProperties |
81 | **/ |
82 | public Properties defaultProperties() |
83 | { |
84 | return new Properties(); |
85 | } |
86 | |
87 | /** |
88 | @see MethodFactory#supportsImplementation |
89 | **/ |
90 | public boolean supportsImplementation(String implementationId) |
91 | { |
92 | return implementationId.equals(IMPLEMENTATIONID); |
93 | } |
94 | |
95 | /** |
96 | @see MethodFactory#primaryImplementationType |
97 | **/ |
98 | public String primaryImplementationType() |
99 | { |
100 | return IMPLEMENTATIONID; |
101 | } |
102 | |
103 | /** |
104 | @see MethodFactory#supportsFormat |
105 | **/ |
106 | public boolean supportsFormat(UUID formatid) |
107 | { |
108 | return formatid.equals(formatUUID); |
109 | } |
110 | |
111 | /** |
112 | @see MethodFactory#primaryFormat |
113 | **/ |
114 | public UUID primaryFormat() |
115 | { |
116 | return formatUUID; |
117 | } |
118 | |
119 | /* |
120 | ** Methods of SortFactory |
121 | */ |
122 | |
123 | /** |
124 | Create a sort. |
125 | This method could choose among different sort options, |
126 | depending on the properties etc., but currently it always |
127 | returns a merge sort. |
128 | @see SortFactory#createSort |
129 | **/ |
130 | public Sort createSort( |
131 | TransactionController tran, |
132 | int segment, |
133 | Properties implParameters, |
134 | DataValueDescriptor[] template, |
135 | ColumnOrdering columnOrdering[], |
136 | SortObserver sortObserver, |
137 | boolean alreadyInOrder, |
138 | long estimatedRows, |
139 | int estimatedRowSize) |
140 | throws StandardException |
141 | { |
142 | MergeSort sort = new MergeSort(); |
143 | |
144 | // RESOLVE - mikem change this to use estimatedRows and |
145 | // estimatedRowSize to come up with a smarter number for sortBufferMax |
146 | // than a fixed number of rows. At least 2 possibilities: |
147 | // 1) add sortBufferMaxMem which would be the amount of memory |
148 | // the sorter could use, and then just pick the number of |
149 | // rows as (sortBufferMaxMem / (estimatedRows * estimatedRowSize) |
150 | // 2) add sortBufferUsePercentFree. This would be how much of |
151 | // the current free memory can the current sort use. |
152 | // |
153 | |
154 | if (!userSpecified) |
155 | { |
156 | // derby.storage.sortBufferMax is not specified by the |
157 | // user, use default or try to figure out a reasonable sort |
158 | // size. |
159 | |
160 | // if we have some idea on row size, set sort approx 1 meg of |
161 | // memory sort. |
162 | if (estimatedRowSize > 0) |
163 | { |
164 | // |
165 | // for each column, there is a reference from the key array and |
166 | // the 4 bytes reference to the column object plus 12 bytes |
167 | // tax on the column object |
168 | // for each row, SORT_ROW_OVERHEAD is the Node and 4 bytes to |
169 | // point to the column array and 4 for alignment |
170 | // |
171 | estimatedRowSize += SORT_ROW_OVERHEAD + |
172 | (template.length*(4+12)) + 8; |
173 | sortBufferMax = DEFAULT_MEM_USE/estimatedRowSize; |
174 | } |
175 | else |
176 | { |
177 | sortBufferMax = defaultSortBufferMax; |
178 | } |
179 | |
180 | // if there are barely more rows than sortBufferMax, use 2 |
181 | // smaller runs of similar size instead of one larger run |
182 | // |
183 | // 10% slush is added to estimated Rows to catch the case where |
184 | // estimated rows underestimate the actual number of rows by 10%. |
185 | // |
186 | if (estimatedRows > sortBufferMax && |
187 | (estimatedRows*1.1) < sortBufferMax*2) |
188 | sortBufferMax = (int)(estimatedRows/2 + estimatedRows/10); |
189 | |
190 | // Make sure it is at least the minimum sort buffer size |
191 | if (sortBufferMax < MINIMUM_SORTBUFFERMAX) |
192 | sortBufferMax = MINIMUM_SORTBUFFERMAX; |
193 | } |
194 | else |
195 | { |
196 | // if user specified derby.storage.sortBufferMax, use it. |
197 | sortBufferMax = defaultSortBufferMax; |
198 | } |
199 | |
200 | if (SanityManager.DEBUG) |
201 | { |
202 | if (SanityManager.DEBUG_ON("SortTuning")) |
203 | { |
204 | SanityManager.DEBUG("SortTuning", |
205 | "sortBufferMax = " + sortBufferMax + |
206 | " estimatedRows = " + estimatedRows + |
207 | " estimatedRowSize = " + estimatedRowSize + |
208 | " defaultSortBufferMax = " + defaultSortBufferMax); |
209 | } |
210 | } |
211 | |
212 | sort.initialize( |
213 | template, columnOrdering, sortObserver, |
214 | alreadyInOrder, estimatedRows, sortBufferMax); |
215 | return sort; |
216 | } |
217 | |
218 | /** |
219 | * Return an open SortCostController. |
220 | * <p> |
221 | * Return an open SortCostController which can be used to ask about |
222 | * the estimated costs of SortController() operations. |
223 | * <p> |
224 | * |
225 | * @return The open SortCostController. |
226 | * |
227 | * @exception StandardException Standard exception policy. |
228 | * |
229 | * @see SortCostController |
230 | **/ |
231 | public SortCostController openSortCostController() |
232 | throws StandardException |
233 | { |
234 | return(this); |
235 | } |
236 | |
237 | /* |
238 | ** Methods of SortCostController |
239 | */ |
240 | |
241 | public void close() |
242 | { |
243 | // nothing to do. |
244 | } |
245 | |
246 | /** |
247 | * Short one line description of routine. |
248 | * <p> |
249 | * The sort algorithm is a N * log(N) algorithm. The following numbers |
250 | * on a PII, 400 MHZ machine, jdk117 with jit, insane.zip. This test |
251 | * is a simple "select * from table order by first_int_column. I then |
252 | * subtracted the time it takes to do "select * from table" from the |
253 | * result. |
254 | * |
255 | * number of rows elaspsed time in seconds |
256 | * -------------- ----------------------------- |
257 | * 1000 0.20 |
258 | * 10000 10.5 |
259 | * 100000 80.0 |
260 | * |
261 | * We assume that the formula for sort performance is of the form: |
262 | * performance = K * N * log(N). Solving the equation for the 1000 |
263 | * and 100000 case we come up with: |
264 | * |
265 | * performance = 1 + 0.08 N ln(n) |
266 | * |
267 | * NOTE: Apparently, these measurements were done on a faster machine |
268 | * than was used for other performance measurements used by the optimizer. |
269 | * Experiments show that the 0.8 multiplier is off by a factor of 4 |
270 | * with respect to other measurements (such as the time it takes to |
271 | * scan a conglomerate). I am correcting the formula to use 0.32 |
272 | * rather than 0.08. |
273 | * |
274 | * - Jeff |
275 | * |
276 | * <p> |
277 | * RESOLVE (mikem) - this formula is very crude at the moment and will be |
278 | * refined later. known problems: |
279 | * 1) internal vs. external sort - we know that the performance of sort |
280 | * is discontinuous when we go from an internal to an external sort. |
281 | * A better model is probably a different set of contants for internal |
282 | * vs. external sort and some way to guess when this is going to happen. |
283 | * 2) current row size is never considered but is critical to performance. |
284 | * 3) estimatedExportRows is not used. This is a critical number to know |
285 | * if an internal vs. an external sort will happen. |
286 | * |
287 | * <p> |
288 | * |
289 | * @return The identifier to be used to open the conglomerate later. |
290 | * |
291 | * |
292 | * @exception StandardException Standard exception policy. |
293 | **/ |
294 | public double getSortCost( |
295 | DataValueDescriptor[] template, |
296 | ColumnOrdering columnOrdering[], |
297 | boolean alreadyInOrder, |
298 | long estimatedInputRows, |
299 | long estimatedExportRows, |
300 | int estimatedRowSize) |
301 | throws StandardException |
302 | { |
303 | /* Avoid taking the log of 0 */ |
304 | if (estimatedInputRows == 0) |
305 | return 0.0; |
306 | |
307 | // RESOLVE - come up with some real benchmark. For now the cost |
308 | // of sort is 3 times the cost of scanning the data. |
309 | |
310 | if (SanityManager.DEBUG) |
311 | { |
312 | SanityManager.ASSERT(estimatedInputRows >= 0); |
313 | SanityManager.ASSERT(estimatedExportRows >= 0); |
314 | } |
315 | |
316 | double ret_val = |
317 | 1 + |
318 | ((0.32) * (estimatedInputRows) * Math.log(estimatedInputRows)); |
319 | |
320 | return(ret_val); |
321 | } |
322 | |
323 | /* |
324 | ** Methods of ModuleControl. |
325 | */ |
326 | |
327 | public boolean canSupport(Properties startParams) { |
328 | |
329 | if (startParams == null) |
330 | return false; |
331 | |
332 | String impl = startParams.getProperty("derby.access.Conglomerate.type"); |
333 | if (impl == null) |
334 | return false; |
335 | |
336 | return supportsImplementation(impl); |
337 | } |
338 | |
339 | |
340 | public void boot(boolean create, Properties startParams) |
341 | throws StandardException |
342 | { |
343 | // Find the UUID factory. |
344 | UUIDFactory uuidFactory = Monitor.getMonitor().getUUIDFactory(); |
345 | |
346 | // Make a UUID that identifies this sort's format. |
347 | formatUUID = uuidFactory.recreateUUID(FORMATUUIDSTRING); |
348 | |
349 | // See if there's a new maximum sort buffer size. |
350 | defaultSortBufferMax = PropertyUtil.getSystemInt("derby.storage.sortBufferMax", |
351 | 0, Integer.MAX_VALUE, 0); |
352 | |
353 | // if defaultSortBufferMax is 0, the user did not specify |
354 | // sortBufferMax, then just set it to DEFAULT_SORTBUFFERMAX. |
355 | // if defaultSortBufferMax is not 0, the user specified sortBufferMax, |
356 | // do not override it. |
357 | if (defaultSortBufferMax == 0) |
358 | { |
359 | userSpecified = false; |
360 | defaultSortBufferMax = DEFAULT_SORTBUFFERMAX; |
361 | } |
362 | else |
363 | { |
364 | userSpecified = true; |
365 | if (defaultSortBufferMax < MINIMUM_SORTBUFFERMAX) |
366 | defaultSortBufferMax = MINIMUM_SORTBUFFERMAX; |
367 | } |
368 | |
369 | } |
370 | |
371 | public void stop() |
372 | { |
373 | } |
374 | |
375 | } |