public void
Execute( )
{
// Create a new FileSystemWatcher object.
using (FileSystemWatcher watcher = new FileSystemWatcher())
{
watcher.Path = “d:Share”;
watcher.IncludeSubdirectories = true;
watcher.Filter = “*.*”;
watcher.NotifyFilter = NotifyFilters.FileName |
NotifyFilters.DirectoryName |
NotifyFilters.Attributes |
NotifyFilters.Size |
NotifyFilters.LastWrite |
NotifyFilters.CreationTime |
NotifyFilters.Security;
// Add event handlers.
watcher.Changed += new FileSystemEventHandler(OnChanged);
watcher.Created += new FileSystemEventHandler(OnChanged);
watcher.Renamed += new RenamedEventHandler(OnRenamed);
// Begin watching.
watcher.EnableRaisingEvents = true;
// Wait for the user to quit.
Console.WriteLine("Press 'Escape (Esc)' to quit.");
while (true)
{
if (Console.KeyAvailable)
{
ConsoleKeyInfo key = Console.ReadKey();
if (key.Key == ConsoleKey.Escape)
{
break;
}
}
// Wait for some time and process files.
Thread.Sleep(1000);
ProcessFilesToBeClassified();
PruneClassifiedFilesCache();
}
}
2 - Use timeout to avoid transitional changes such as Microsoft Word saving a temporary file.
There are some things to note about the FileSystemWatcher class, mentioned here:
http://weblogs.asp.net/ashben/archive/2003/10/14/31773.aspx
, that will impact the design of our event handlers. The most crucial behavior for us is that multiple events can be raised for a single action. A similar scenario happens when a user saves the file frequently while working on it. We don’t want to classify the file each time the users saves it or each time we are notified by the watcher so we don’t immediately classify a file when the watcher signals us. Instead we put the file in a “to be classified” list. If the file is already in the list we update the timestamp of the file and move it to the end of the list.
private static void OnChanged(object source, FileSystemEventArgs e)
{
bool found = false;
foreach (FileItem fileitem in filesToBeClassified)
{
if (fullPath.Equals(fileitem.FullPath, StringComparison.CurrentCultureIgnoreCase))
{
found = true;
bool returnValue = filesToBeClassified.Remove(fileitem);
fileitem.TimeStamp = DateTime.UtcNow;
if (classificationAttempted)
{
fileitem.AttemptCount++;
}
filesToBeClassified.AddLast(fileitem;
break;
}
}
if (!found)
{
FileItem file = new FileItem();
file.FullPath = fullPath;
file.TimeStamp = DateTime.UtcNow;
file.AttemptCount = 0;
filesToBeClassified.AddLast(file);
}
}
3 - Process the changes for each file.
As shown earlier in Execute(), we periodically do two things to actually process the files we keep putting in our “to be classified” list. 1) we run through the “to be classified” list to see if any of the files are older than our update threshold, which is the minimum period of time to wait before actually classifying and applying policy to the file we put in the list. This allows us to consolidate multiple watcher events into one classification call. If the list has any files older than that threshold we classify and apply policy to them. 2) Any file that was classified in the previous step is placed in a “classified files” list and removed from the “to be classified” list. Once a file has been in this list longer that our cache threshold, we remove the file from the list. Keeping track of recently classified files allows us to skip classifying a file in the case where it is updated, but its classification doesn’t change. We don’t want to set properties if we don’t have to.
3a - Get the properties for the file.
Now, the first thing we do when a file crosses the update threshold is call IFsrmClassificationManager::EnumFileProperties to classify the file. The EnumFileProperties API can be run in two modes: run classification rules when determining the property value or just look at previously stored values, either from a previous classification run or properties embedded in the file. For our purposes we want to run the classification rules so that properties will show that weren’t there before. Also, if you have rules that use look at the content of the file you’ll want to have those run in the event that phrases you are looking for are added to the file. We also save off all the property definitions that are defined at the time the file is classified so we know what property values came from FCI and can be set later on.
private void
ClassifyFile(
FileItem file,
FsrmClassificationManager manager
)
{
IFsrmCollection propDefinitions = null;
// Enumerate all FCI classification properties of the
// file and save those properties with the file
// so they can be compared against future values.
file.Properties = manager.EnumFileProperties(file.FullPath, _FsrmGetFilePropertyOptions.FsrmGetFilePropertyOptions_None);
propDefinitions = manager.EnumPropertyDefinitions(_FsrmEnumOptions.FsrmEnumOptions_None);
file.PropertyDefinitions = new Dictionary<string, IFsrmPropertyDefinition>();
foreach (IFsrmPropertyDefinition propDef in propDefinitions)
{
file.PropertyDefinitions.Add(propDef.Name, propDef);
}
file.TimeStamp = DateTime.UtcNow;
}<
private bool
IsFileInClassifiedCache(
FileItem file
)
{
bool isAlreadyClassified = false;
if (classifiedFilesCache.ContainsKey(file.FullPath))
{
FileItem classifiedFile = classifiedFilesCache[file.FullPath];
// Compare file properties.
if (file.Compare(classifiedFile))
{
// File properties have not changed. Update the timestamp of the file item.
classifiedFile.TimeStamp = DateTime.UtcNow;
isAlreadyClassified = true;
//
// Remove the file item from the files to be classified list.
// Note that, node may have moved in the linked list.
//
RemoveFileFromToBeClassifiedList(file.FullPath);
}
else
{
// File properties have changed.
bool returnValue = classifiedFilesCache.Remove(classifiedFile.FullPath);
classifiedFile = null;
}
}
return isAlreadyClassified;
}
private void
SetFCIProperties(
FileItem file,
FsrmClassificationManager manager
)
{
// Skip setting properties on ReadOnly files.
if ((File.GetAttributes(file.FullPath) & FileAttributes.ReadOnly) == FileAttributes.ReadOnly)
{
PrintMessage("File: "" + file.FullPath +
"" is a read only file. Properties are not set on read only files.", ConsoleColor.Yellow);
return;
}
foreach (IFsrmProperty property in file.Properties)
{
// Set this property on the file only if it is FCI property.
if (file.PropertyDefinitions.ContainsKey(property.Name))
{
PrintMessage("Setting property "" + property.Name + "=" + property.Value +
"" on file: "" + file.FullPath + """, ConsoleColor.Green);
manager.SetFileProperty(file.FullPath, property.Name, property.Value);
}
else
{
PrintMessage("Found property "" + property.Name + "=" + property.Value + "" on file: "" +
file.FullPath + """, ConsoleColor.Yellow);
}
}
}
3d – Move the file from the “to be classified” list to the “recently classified” list.
Once the file’s properties have been successfully set we add the file to the “recently classified” list and remove it from the “to be classified list.” Not much to say on this point, but important to do.
3d - Apply the policy to the file.
Now that we have classified the file and optimized to only act on those files where classification has changed we can get around to applying any policy we might want to take, provided the file matches our criteria. We loop through the properties we saved off when we classified the file (no sense in calling GetFileProperty and classifying the file again) and see if the value is what we are looking for. This sample only does a simple string comparison, but there are many other comparison operations you may want to consider. For example, if you are using an ordered list property you may want to act on all files whose value has a greater order value than a particular value. When you know the file meets your criteria you can send it to the exe or script that applies the policy. In this example we send the file to an exe that will encrypt it. Now we don't have to worry about our sensitive information sitting around unprotected until the file management job runs.
private void
EvaluatePolicyConditionAndExecutePolicyAction(
FileItem file
)
{
bool foundProperty = false;
// Get the specified property from the list of properties we already saved.
// We don’t want to call GetFileProperty for this.
foreach (IFsrmProperty property in file.Properties)
{
if (property.Name.Equals(propertyCondition[0], StringComparison.CurrentCultureIgnoreCase))
{
foundProperty = true;
PrintMessage("Found property "" + property.Name + "=" + property.Value +
"" on file: "" + file.FullPath + """, ConsoleColor.Green);
// Check if the property value matches the value in the policy.
if (property.Value == propertyCondition[1])
{
// Replace [FILEPATH] string with the full path of the current file.
string commandArgs = commandArguments.Value;
if (commandArgs != null)
{
commandArgs = commandArgs.Replace("[FILEPATH]", """ + file.FullPath + """);
}
// Execute the command in separate process. Wait for completion.
PrintMessage("Executing command "" + command.Value + "" with arg "" + commandArgs +
"" on file: "" + file.FullPath + """, ConsoleColor.Green);
Process process = Process.Start(“encryptfiles.exe”, commandArgs);
process.WaitForExit();
PrintMessage("Command return code:" + process.ExitCode, ConsoleColor.Green);
}
break;
}
}
if (!foundProperty)
{
PrintMessage("Property "" + propertyCondition[0] + "" not found on file: "" +
file.FullPath + """, ConsoleColor.White);
}
}
4 - Cleanup the “recently classified” list.
The only thing left to do is prune old entries from the “recently classified” list. Here we remove any files that have been in the list longer than our “cache window” timeout.
private void
PruneClassifiedFilesCache( )
{
TimeSpan ts = TimeSpan.FromSeconds(cacheWindow.Value);
Queue<string> itemsToBeRemoved = new Queue<string>();
foreach (KeyValuePair<string, FileItem> file in classifiedFilesCache)
{
if (DateTime.Compare(file.Value.TimeStamp.Add(ts), DateTime.UtcNow) <= 0
{
itemsToBeRemoved.Enqueue(file.Key);
}
}
while (itemsToBeRemoved.Count > 0)
{
string key = itemsToBeRemoved.Dequeue();
classifiedFilesCache.Remove(key);
}
}
Summary
You might be tempted to just use near real time classification, let it call GetFileProperty and SetFileProperty, and always use it to apply policies to files - it's simple to implement and does the same thing as a file management job. However, you should remember to use the right tool for the job. Automatic classification and file management jobs are designed to efficiently scan whole namespaces where files already exist and are much more efficient than the GetFileProperty and SetFileProperty APIs. There’s also the chance that the FileSystemWatcher can miss files if there are too many file changes. For this reason it is a good idea to have a file management job configured for the same policy and namespace as your near real time-scripts to catch any missed files. Each approach to applying policy has its pros and cons, but if you have a share that you need to protect and can’t wait for a nightly task, near real time classification is the way to go.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.