1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Provides an extension to split up large files in staging directories.
41
42 When this extension is executed, it will look through the configured Cedar
43 Backup staging directory for files exceeding a specified size limit, and split
44 them down into smaller files using the 'split' utility. Any directory which
45 has already been split (as indicated by the C{cback.split} file) will be
46 ignored.
47
48 This extension requires a new configuration section <split> and is intended
49 to be run immediately after the standard stage action or immediately before the
50 standard store action. Aside from its own configuration, it requires the
51 options and staging configuration sections in the standard Cedar Backup
52 configuration file.
53
54 @author: Kenneth J. Pronovici <pronovic@ieee.org>
55 """
56
57
58
59
60
61
62 import os
63 import re
64 import logging
65
66
67 from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership
68 from CedarBackup2.xmlutil import createInputDom, addContainerNode
69 from CedarBackup2.xmlutil import readFirstChild
70 from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles
71 from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode
72
73
74
75
76
77
78 logger = logging.getLogger("CedarBackup2.log.extend.split")
79
80 SPLIT_COMMAND = [ "split", ]
81 SPLIT_INDICATOR = "cback.split"
89
90 """
91 Class representing split configuration.
92
93 Split configuration is used for splitting staging directories.
94
95 The following restrictions exist on data in this class:
96
97 - The size limit must be a ByteQuantity
98 - The split size must be a ByteQuantity
99
100 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize
101 """
102
103 - def __init__(self, sizeLimit=None, splitSize=None):
104 """
105 Constructor for the C{SplitCOnfig} class.
106
107 @param sizeLimit: Size limit of the files, in bytes
108 @param splitSize: Size that files exceeding the limit will be split into, in bytes
109
110 @raise ValueError: If one of the values is invalid.
111 """
112 self._sizeLimit = None
113 self._splitSize = None
114 self.sizeLimit = sizeLimit
115 self.splitSize = splitSize
116
118 """
119 Official string representation for class instance.
120 """
121 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
122
124 """
125 Informal string representation for class instance.
126 """
127 return self.__repr__()
128
130 """
131 Definition of equals operator for this class.
132 Lists within this class are "unordered" for equality comparisons.
133 @param other: Other object to compare to.
134 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
135 """
136 if other is None:
137 return 1
138 if self.sizeLimit != other.sizeLimit:
139 if self.sizeLimit < other.sizeLimit:
140 return -1
141 else:
142 return 1
143 if self.splitSize != other.splitSize:
144 if self.splitSize < other.splitSize:
145 return -1
146 else:
147 return 1
148 return 0
149
151 """
152 Property target used to set the size limit.
153 If not C{None}, the value must be a C{ByteQuantity} object.
154 @raise ValueError: If the value is not a C{ByteQuantity}
155 """
156 if value is None:
157 self._sizeLimit = None
158 else:
159 if not isinstance(value, ByteQuantity):
160 raise ValueError("Value must be a C{ByteQuantity} object.")
161 self._sizeLimit = value
162
164 """
165 Property target used to get the size limit.
166 """
167 return self._sizeLimit
168
170 """
171 Property target used to set the split size.
172 If not C{None}, the value must be a C{ByteQuantity} object.
173 @raise ValueError: If the value is not a C{ByteQuantity}
174 """
175 if value is None:
176 self._splitSize = None
177 else:
178 if not isinstance(value, ByteQuantity):
179 raise ValueError("Value must be a C{ByteQuantity} object.")
180 self._splitSize = value
181
183 """
184 Property target used to get the split size.
185 """
186 return self._splitSize
187
188 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
189 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
190
197
198 """
199 Class representing this extension's configuration document.
200
201 This is not a general-purpose configuration object like the main Cedar
202 Backup configuration object. Instead, it just knows how to parse and emit
203 split-specific configuration values. Third parties who need to read and
204 write configuration related to this extension should access it through the
205 constructor, C{validate} and C{addConfig} methods.
206
207 @note: Lists within this class are "unordered" for equality comparisons.
208
209 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig
210 """
211
212 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
213 """
214 Initializes a configuration object.
215
216 If you initialize the object without passing either C{xmlData} or
217 C{xmlPath} then configuration will be empty and will be invalid until it
218 is filled in properly.
219
220 No reference to the original XML data or original path is saved off by
221 this class. Once the data has been parsed (successfully or not) this
222 original information is discarded.
223
224 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate}
225 method will be called (with its default arguments) against configuration
226 after successfully parsing any passed-in XML. Keep in mind that even if
227 C{validate} is C{False}, it might not be possible to parse the passed-in
228 XML document if lower-level validations fail.
229
230 @note: It is strongly suggested that the C{validate} option always be set
231 to C{True} (the default) unless there is a specific need to read in
232 invalid configuration from disk.
233
234 @param xmlData: XML data representing configuration.
235 @type xmlData: String data.
236
237 @param xmlPath: Path to an XML file on disk.
238 @type xmlPath: Absolute path to a file on disk.
239
240 @param validate: Validate the document after parsing it.
241 @type validate: Boolean true/false.
242
243 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in.
244 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed.
245 @raise ValueError: If the parsed configuration document is not valid.
246 """
247 self._split = None
248 self.split = None
249 if xmlData is not None and xmlPath is not None:
250 raise ValueError("Use either xmlData or xmlPath, but not both.")
251 if xmlData is not None:
252 self._parseXmlData(xmlData)
253 if validate:
254 self.validate()
255 elif xmlPath is not None:
256 xmlData = open(xmlPath).read()
257 self._parseXmlData(xmlData)
258 if validate:
259 self.validate()
260
262 """
263 Official string representation for class instance.
264 """
265 return "LocalConfig(%s)" % (self.split)
266
268 """
269 Informal string representation for class instance.
270 """
271 return self.__repr__()
272
274 """
275 Definition of equals operator for this class.
276 Lists within this class are "unordered" for equality comparisons.
277 @param other: Other object to compare to.
278 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
279 """
280 if other is None:
281 return 1
282 if self.split != other.split:
283 if self.split < other.split:
284 return -1
285 else:
286 return 1
287 return 0
288
290 """
291 Property target used to set the split configuration value.
292 If not C{None}, the value must be a C{SplitConfig} object.
293 @raise ValueError: If the value is not a C{SplitConfig}
294 """
295 if value is None:
296 self._split = None
297 else:
298 if not isinstance(value, SplitConfig):
299 raise ValueError("Value must be a C{SplitConfig} object.")
300 self._split = value
301
303 """
304 Property target used to get the split configuration value.
305 """
306 return self._split
307
308 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.")
309
311 """
312 Validates configuration represented by the object.
313
314 Split configuration must be filled in. Within that, both the size limit
315 and split size must be filled in.
316
317 @raise ValueError: If one of the validations fails.
318 """
319 if self.split is None:
320 raise ValueError("Split section is required.")
321 if self.split.sizeLimit is None:
322 raise ValueError("Size limit must be set.")
323 if self.split.splitSize is None:
324 raise ValueError("Split size must be set.")
325
327 """
328 Adds a <split> configuration section as the next child of a parent.
329
330 Third parties should use this function to write configuration related to
331 this extension.
332
333 We add the following fields to the document::
334
335 sizeLimit //cb_config/split/size_limit
336 splitSize //cb_config/split/split_size
337
338 @param xmlDom: DOM tree as from C{impl.createDocument()}.
339 @param parentNode: Parent that the section should be appended to.
340 """
341 if self.split is not None:
342 sectionNode = addContainerNode(xmlDom, parentNode, "split")
343 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
344 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
345
347 """
348 Internal method to parse an XML string into the object.
349
350 This method parses the XML document into a DOM tree (C{xmlDom}) and then
351 calls a static method to parse the split configuration section.
352
353 @param xmlData: XML data to be parsed
354 @type xmlData: String data
355
356 @raise ValueError: If the XML cannot be successfully parsed.
357 """
358 (xmlDom, parentNode) = createInputDom(xmlData)
359 self._split = LocalConfig._parseSplit(parentNode)
360
361 @staticmethod
363 """
364 Parses an split configuration section.
365
366 We read the following individual fields::
367
368 sizeLimit //cb_config/split/size_limit
369 splitSize //cb_config/split/split_size
370
371 @param parent: Parent node to search beneath.
372
373 @return: C{EncryptConfig} object or C{None} if the section does not exist.
374 @raise ValueError: If some filled-in value is invalid.
375 """
376 split = None
377 section = readFirstChild(parent, "split")
378 if section is not None:
379 split = SplitConfig()
380 split.sizeLimit = readByteQuantity(section, "size_limit")
381 split.splitSize = readByteQuantity(section, "split_size")
382 return split
383
384
385
386
387
388
389
390
391
392
393 -def executeAction(configPath, options, config):
419
420
421
422
423
424
425 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
426 """
427 Splits large files in a daily staging directory.
428
429 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"},
430 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored.
431 All other files are split.
432
433 @param dailyDir: Daily directory to encrypt
434 @param sizeLimit: Size limit, in bytes
435 @param splitSize: Split size, in bytes
436 @param backupUser: User that target files should be owned by
437 @param backupGroup: Group that target files should be owned by
438
439 @raise ValueError: If the encrypt mode is not supported.
440 @raise ValueError: If the daily staging directory does not exist.
441 """
442 logger.debug("Begin splitting contents of [%s]." % dailyDir)
443 fileList = getBackupFiles(dailyDir)
444 for path in fileList:
445 size = float(os.stat(path).st_size)
446 if size > sizeLimit.bytes:
447 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
448 logger.debug("Completed splitting contents of [%s]." % dailyDir)
449
450
451
452
453
454
455 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
456 """
457 Splits the source file into chunks of the indicated size.
458
459 The split files will be owned by the indicated backup user and group. If
460 C{removeSource} is C{True}, then the source file will be removed after it is
461 successfully split.
462
463 @param sourcePath: Absolute path of the source file to split
464 @param splitSize: Encryption mode (only "gpg" is allowed)
465 @param backupUser: User that target files should be owned by
466 @param backupGroup: Group that target files should be owned by
467 @param removeSource: Indicates whether to remove the source file
468
469 @raise IOError: If there is a problem accessing, splitting or removing the source file.
470 """
471 cwd = os.getcwd()
472 try:
473 if not os.path.exists(sourcePath):
474 raise ValueError("Source path [%s] does not exist." % sourcePath)
475 dirname = os.path.dirname(sourcePath)
476 filename = os.path.basename(sourcePath)
477 prefix = "%s_" % filename
478 bytes = int(splitSize.bytes)
479 os.chdir(dirname)
480 command = resolveCommand(SPLIT_COMMAND)
481 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ]
482 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
483 if result != 0:
484 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath))
485 pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix)
486 match = pattern.search(output[-1:][0])
487 if match is None:
488 raise IOError("Unable to parse output from split command.")
489 value = int(match.group(3).strip())
490 for index in range(0, value):
491 path = "%s%05d" % (prefix, index)
492 if not os.path.exists(path):
493 raise IOError("After call to split, expected file [%s] does not exist." % path)
494 changeOwnership(path, backupUser, backupGroup)
495 if removeSource:
496 if os.path.exists(sourcePath):
497 try:
498 os.remove(sourcePath)
499 logger.debug("Completed removing old file [%s]." % sourcePath)
500 except:
501 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath))
502 finally:
503 os.chdir(cwd)
504