cf/libucw

   1 # Configuration of the UCW library and related utilities (included by cf/sherlock)
   2
   3 ######## Memory Mapped Access to Files ##########################################
   4
   5 # Whenever you specify 0 for I/O buffer size, memory mapping is used instead.
   6 FBMMap {
   7
   8 # Map this many bytes at once (needs to be a multiple of CPU page size)
   9 WindowSize              1M
  10
  11 # When in need to extend a file, grow it by so many bytes (>= page size)
  12 ExtendSize              1M
  13
  14 }
  15
  16 ######## Direct Streamed I/O on Files ###########################################
  17
  18 FBDirect {
  19
  20 # Debug: Cheat by turning off O_DIRECT
  21 #Cheat                  1
  22
  23 }
  24
  25 ######## Atomic Multi-Threaded I/O on Files #####################################
  26
  27 FBAtomic {
  28
  29 # Enable tracing
  30 #Trace                  1
  31
  32 }
  33
  34 ######## Parametrized I/O on Files ##############################################
  35
  36 FBParam {
  37
  38 Defaults {
  39
  40 # Access type (std|direct|mmap).
  41 Type                    std
  42
  43 # Size of I/O buffer. Something of the order of megabytes for fast disks is recommended for direct I/O.
  44 BufSize                 64K
  45
  46 # Optimize for mixed forward/backward reading (standard I/O only)
  47 KeepBackBuf             0
  48
  49 # Perform read-ahead (direct I/O only)
  50 ReadAhead               1
  51
  52 # Maximum number of write-back requests queued (direct I/O only)
  53 WriteBack               1
  54
  55 }
  56
  57 }
  58
  59 ######## Temporary files ########################################################
  60
  61 Tempfiles {
  62
  63 # By default, we use the system's default temporary directory ($TMPDIR or /tmp),
  64 # but sometimes it is better to store the temporary files in the local tree.
  65 Dir                     tmp
  66
  67 # Prefix of temporary file names
  68 Prefix                  temp-
  69
  70 # By default, we append a random number to Prefix to get a temporary file name.
  71 # If Prefix points to a directory that is not writable by malicious users,
  72 # we can be less careful and use more consistent names of temporary files
  73 # formed by adding "pid(-tid)-counter" instead.
  74 PublicDir               0
  75
  76 }
  77
  78 ######## Threads ################################################################
  79
  80 Threads {
  81
  82 # Default thread stack size
  83 DefaultStackSize        64K
  84
  85 }
  86
  87 ######## Sorter #################################################################
  88
  89 Sorter {
  90
  91 # Trace sorting (1=basic statistics, 2=more stats, 3 and more for debugging)
  92 Trace                   2
  93
  94 # Trace array sorting (internal sorters)
  95 TraceArray              0
  96
  97 # How much memory is the sorter allowed to use
  98 SortBuffer              4M
  99
 100 # File access used by the sorter (see FBParam section for details)
 101 FileAccess              std 256K
 102
 103 # Use a different file access method for small inputs (less than the specified size)
 104 SmallFileAccess         std 64K
 105 SmallInput              64M
 106
 107 # Min-/Maximum number of bits to use in the external radix-sort (beware, we will open
 108 # 1+2^this files and require a stream buffer for each of them; however, while we are
 109 # doing that, the sort buffer is not allocated). Set both to zero to disable radix-sorting.
 110 MinRadixBits            2
 111 MaxRadixBits            4
 112
 113 # The same for multi-way merging. The memory requirements are also the same,
 114 # but please keep in mind that this can create lots of SortBuffer-sized files,
 115 # so it is probably better to keep it disabled if you have a small SortBuffer.
 116 MinMultiwayBits         2
 117 MaxMultiwayBits         4
 118
 119 # If we did not use radix-sorter to the full width, we still might add some more
 120 # bits to the width to get chunks which are even smaller than SortBuffer, because
 121 # it can speed up internal sorting later. However, we also want to avoid small
 122 # files, so we add only a little.
 123 AddRadixBits            2
 124
 125 # Number of threads used for sorting (0=disable threading)
 126 Threads                 0
 127
 128 # Minimum size of input (in bytes) to consider multi-threaded internal sorting
 129 ThreadThreshold         1M
 130
 131 # Chunks smaller than ThreadThreshold are sorted by a sequential algorithm, but
 132 # if they are at least of the following size, different chunks are sorted in
 133 # parallel. There is a slight space penalty for setting up the parallel process,
 134 # so better avoid setting this number too small.
 135 ThreadChunk             256
 136
 137 # Internal radix-sort stops at this size and switches to QuickSort (must be >0)
 138 RadixThreshold          4K
 139
 140 # Debugging switches (see the source)
 141 Debug                   0
 142
 143 }
 144
 145 ######## URL processing #########################################################
 146
 147 URL {
 148
 149 # Ignore spaces at the start/end of a URL
 150 IgnoreSpaces            1
 151
 152 # Ignore underflows in relative paths (/../ from root)
 153 IgnoreUnderflow         1
 154
 155 # Some URL's with many repeated components are filtered out to avoid infinite
 156 # URL's (e.g. http://czech.recoder.cz/win/iso/win/iso/file.html, or
 157 # http://a.com/?a=b&a=b&a=b, ...).
 158 # The URL is split to components divided by any of the specified separators.
 159 # Then the separators are forgotten and the components between them are
 160 # examined.
 161 ComponentSeparators     /&?
 162
 163 # URL is filtered out if there's a sequence of components in a row with at most
 164 # MaxRepeatLength components and the sequence is repeated more than MinRepeatCount
 165 # times.  Default values are high MinRepeatCount and low MaxRepeatLength, so the
 166 # mechanism is disabled.
 167 MinRepeatCount          4
 168 MaxRepeatLength         4
 169
 170 # Maximum number of occurences of a single component in the entire URL (possibly interleaved
 171 # by different components). The detector is disabled by default.
 172 MaxOccurences           4
 173
 174 }
 175
 176 ######## Logging ################################################################
 177
 178 Logging {
 179
 180 # In this section, you can define various logging streams which can be referred to by other sections.
 181
 182 # Stream {
 183 #       # The name of the stream
 184 #       Name            test-log
 185 #
 186 #       # When it should log the messages to a file, a name of the file should be specified.
 187 #       # Escape sequences for current date and time as described in strftime(3) can be used.
 188 #       FileName        log/test-%Y%m%d
 189 #
 190 #       # Instead of a file, a syslog facility can be specified. See syslog(3) for an explanation.
 191 #       SyslogFacility  daemon
 192 #
 193 #       # You can request that syslog includes a process ID in each message. Due to inflexibility
 194 #       # of the syslog protocol, all syslog streams active at a moment must agree on this setting.
 195 #       # (default: 0)
 196 #       SyslogPID       1
 197 #
 198 #       # When logging to files, timestamps with microsecond precision can be requested. (default: 0)
 199 #       Microseconds    1
 200 #
 201 #       # Messages logged to this stream can be restricted to a subset of severity levels.
 202 #       # Available levels are: debug info warn error info_r warn_r error_r fatal.
 203 #       # This configuration item is a bitmap with a default of "all", so we need the ":reset" operator.
 204 #       Levels:reset    info warn error fatal
 205 #
 206 #       # Similarly, messages can be restricted to a subset of message types. The types are
 207 #       # specific for each program. This configuration item is a list of type names; by default
 208 #       # it is empty, which is equivalent to all types being enabled.
 209 #       Types:reset     default foo
 210 #
 211 #       # Should the message types be logged? They usually do not carry much useful
 212 #       # information for the viewer of the log, so they are not included by default,
 213 #       # but you might want to see them when tuning the Types setting. (default: 0)
 214 #       ShowTypes       1
 215 #
 216 #       # If an error occurs when logging a message to this stream, the program normally
 217 #       # logs a special error message to the other streams and continues running. You can
 218 #       # however request to exit the program in such cases, so that the log files are
 219 #       # guaranteed to be complete. (default: 0)
 220 #       ErrorsFatal     1
 221 #
 222 #       # Let stderr of the program point to this file-based log_stream (default: 0)
 223 #       StdErrFollows   1
 224 #
 225 #       # Some events are logworthy, but they could happen too frequently and flood the log.
 226 #       # You can avoid the flooding by setting up a rate limiter for a specific subset of
 227 #       # message types. If more limiters match the type of a message, only the last one applies.
 228 #       Limit {
 229 #               # A list of message types (default: empty = all types)
 230 #               Types           default foo
 231 #
 232 #               # The maximum allowed sustained rate (messages/second, may be fractional)
 233 #               Rate            1
 234 #
 235 #               # Maximum length of a burst temporarily exceeding the rate (default: try to guess)
 236 #               Burst           2
 237 #       }
 238 #
 239 #       # The messages that have passed the filters and limiters can be forwarded to other
 240 #       # log streams. Logging loops are not healthy for your program :)  (a list of stream names)
 241 #       Substream       another-stream
 242 # }
 243
 244 }