// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Xml; using System.Xml.Linq; using System.IO; using System.Diagnostics; using CSVStats; using System.Collections; using System.Collections.Concurrent; using System.Threading; using System.Security.Cryptography; using PerfSummaries; namespace PerfReportTool { class CachedCsvFile { enum FileType { TextCsv, BinaryCsv, SummaryTableCachePrc }; public CachedCsvFile(string inFilename, bool useCacheFiles, DerivedMetadataMappings inDerivedMetadataMappings, DerivedCsvStatDefinition[] inDerivedCsvStatDefinitions) { if (inFilename.ToLower().EndsWith(".csv")) { fileType = FileType.TextCsv; } else if (inFilename.ToLower().EndsWith(".csv.bin")) { fileType = FileType.BinaryCsv; } else if (inFilename.ToLower().EndsWith(".prc")) { fileType = FileType.SummaryTableCachePrc; } else { throw new Exception("File extension not supported for file " + inFilename); } string cacheFilename = inFilename + ".cache"; if (useCacheFiles && File.Exists(cacheFilename)) { string[] fileLines = File.ReadAllLines(cacheFilename); // Put the stats and metadata lines in the standard order if (fileLines.Length >= 3) { string metadataLine = fileLines[1]; string statsLine = fileLines[2]; fileLines[1] = statsLine; fileLines[2] = metadataLine; } dummyCsvStats = CsvStats.ReadCSVFromLines(fileLines, null, 0, true); } else { if (fileType == FileType.BinaryCsv) { dummyCsvStats = CsvStats.ReadBinFile(inFilename, null, 0, true); } else if (fileType == FileType.TextCsv) { textCsvLines = File.ReadAllLines(inFilename); if (textCsvLines.Length > 0) { dummyCsvStats = CsvStats.ReadCSVFromLines(textCsvLines, null, 0, true); } else { Console.WriteLine("CSV file " + inFilename + " contains no lines!"); dummyCsvStats = new CsvStats(); } } else if (fileType == FileType.SummaryTableCachePrc) { // Read just the Csv metadata so we can filter metadata = null; cachedSummaryTableRowData = SummaryTableRowData.TryReadFromCacheFile(inFilename, true); if (cachedSummaryTableRowData != null) { metadata = cachedSummaryTableRowData.ReadCsvMetadata(); } else { Console.WriteLine("Invalid PRC file detected: " + inFilename); } } } filename = inFilename; // Setup initial metadata variable mappings derived metadata (which can read from the initial variableMappings). // Note: we skip for PRCs (dummyCsvStats==null), since these already have mappings cached if (dummyCsvStats != null) { xmlVariableMappings = new XmlVariableMappings(); // Note: We could apply the global variable set here, but that is slow and leads to complexity and correctness issues due to dependency order if (dummyCsvStats.metaData != null) { metadata = dummyCsvStats.metaData; xmlVariableMappings.SetMetadataVariables(metadata); inDerivedMetadataMappings.ApplyMapping(metadata, xmlVariableMappings); } // Add the derived csv stats to dummyStats derivedCsvStatDefinitions = inDerivedCsvStatDefinitions; foreach (DerivedCsvStatDefinition derivedCsvStatDefinition in derivedCsvStatDefinitions) { dummyCsvStats.AddStat(new StatSamples(derivedCsvStatDefinition.name, 0, true)); } } } private void AddDerivedMetadataToCsv(CsvStats csvStats) { foreach (DerivedCsvStatDefinition derivedStat in derivedCsvStatDefinitions) { finalCsv.AddDerivedStatFromExpression(derivedStat.getExpressionStr()); } } public void PrepareCsvData() { if (fileType == FileType.BinaryCsv) { finalCsv = CsvStats.ReadBinFile(filename); AddDerivedMetadataToCsv(finalCsv); // If we already have metadata then use it. This avoids the need to remap it multiple times if (metadata != null) { finalCsv.metaData = metadata; } } else { if (textCsvLines == null) { textCsvLines = File.ReadAllLines(filename); } } } public CsvStats GetFinalCsv() { if (finalCsv != null) { return finalCsv; } if (fileType == FileType.TextCsv) { // TODO: Investigate moving this to the PrepareCsvData function so it can be done in parallel like with binary CSVs // Not sure why it's not like that currently - maybe mem usage was the issue, or it was faster for some reason finalCsv = CsvStats.ReadCSVFromLines(textCsvLines, null); AddDerivedMetadataToCsv(finalCsv); // If we already have metadata then use it. This avoids the need to remap it multiple times if ( metadata != null ) { finalCsv.metaData = metadata; } return finalCsv; } return null; } public bool DoesMetadataMatchQuery(QueryExpression metadataQuery) { if (metadataQuery == null) { return true; } if (metadata == null) { Console.WriteLine("CSV " + filename + " has no metadata"); return false; } return metadataQuery.Evaluate(metadata); } public void ComputeSummaryTableCacheId(string reportTypeId) { // If the CSV has an embedded ID, use that. Otherwise generate one. string csvId; if (metadata != null && metadata.Values.ContainsKey("csvid")) { csvId = metadata.Values["csvid"]; } else { // Fall back to absolute path if the CSVID metadata doesn't exist // Note that using the filename like this means moving the file will result in a new entry for each location StringBuilder sb = new StringBuilder(); sb.Append("CSVFILENAME={" + Path.GetFullPath(filename).ToLower() + "}\n"); if (metadata != null) { foreach (string key in metadata.Values.Keys) { sb.Append("{" + key + "}={" + metadata.Values[key] + "}\n"); } } csvId = HashHelper.StringToHashStr(sb.ToString()); } if (reportTypeId == null) { summaryTableCacheId = csvId; } else { summaryTableCacheId = csvId + "_" + reportTypeId; } } public string filename; public string summaryTableCacheId; public string[] textCsvLines; public CsvStats dummyCsvStats; public CsvStats finalCsv; public CsvMetadata metadata; public SummaryTableRowData cachedSummaryTableRowData; public ReportTypeInfo reportTypeInfo; public XmlVariableMappings xmlVariableMappings; public DerivedCsvStatDefinition[] derivedCsvStatDefinitions; FileType fileType; }; class ExternalMetadata : Dictionary { } class ExternalMetadataCache { public ExternalMetadataCache(List inExternalMetadataSourceStrings) { externalMetadataSourceStrings = inExternalMetadataSourceStrings; } public void InjectExternalMetadata(SummaryTableRowData summaryTableRowData) { foreach (string filename in externalMetadataSourceStrings) { string resolvedFilename = ResolveExternalMetadataFilename(filename, summaryTableRowData); if (resolvedFilename == null) { continue; } ExternalMetadata fileDict = LoadExternalMetadataFile(resolvedFilename); foreach (KeyValuePair entry in fileDict) { // Add metadata if it doesn't already exist SummaryTableElement element = summaryTableRowData.Get(entry.Key); if (element == null) { summaryTableRowData.Add(SummaryTableElement.Type.ExternalMetadata, entry.Key, entry.Value); } } } } ExternalMetadata LoadExternalMetadataFile(string filename) { lock (cachedExternalMetadataFiles) { if (cachedExternalMetadataFiles.TryGetValue(filename, out ExternalMetadata cachedDict)) { return cachedDict; } } ExternalMetadata dict = new ExternalMetadata(); if (System.IO.File.Exists(filename)) { string[] lines = System.IO.File.ReadAllLines(filename); foreach (string line in lines) { int idx = line.IndexOf('='); if (idx == -1) { break; } string key = line[..idx]; string value = line[(idx + 1)..]; dict[key] = value; } } // Add to the cache if it wasn't added while we were reading the file lock (cachedExternalMetadataFiles) { if (!cachedExternalMetadataFiles.ContainsKey(filename)) { cachedExternalMetadataFiles[filename] = dict; } } return dict; } // Converts an external metadata file path (including {} metadata entries) to an actual filename by resolving the metadata entries // Returns null on failure static string ResolveExternalMetadataFilename(string sourceFilename, SummaryTableRowData summaryTableRowData) { string filenameOut = ""; // Replace {meta} entries with actual metadata int readIndex = 0; int startIndex = sourceFilename.IndexOf('{'); while (startIndex >= 0) { int endIndex = sourceFilename.IndexOf('}', startIndex); if (endIndex == -1) { throw new Exception("Warning: bad external metadata filename (mismatched {): " + sourceFilename); } string key = sourceFilename[(startIndex+1)..endIndex].ToLowerInvariant(); SummaryTableElement sourceElement = summaryTableRowData.Get(key); if (sourceElement == null || sourceElement.type != SummaryTableElement.Type.CsvMetadata) { // If we fail to find the metadata, that's probably fine. Just return null. Ignore other types return null; } string value = sourceElement.value; filenameOut += sourceFilename[readIndex..startIndex] + value; readIndex = endIndex+1; startIndex = sourceFilename.IndexOf('{', endIndex); } filenameOut += sourceFilename[readIndex..]; return filenameOut; } Dictionary cachedExternalMetadataFiles = new Dictionary(); List externalMetadataSourceStrings; }; class CsvFileCache { public CsvFileCache( string[] inCsvFilenames, int inLookaheadCount, int inThreadCount, bool inUseCacheFiles, QueryExpression inMetadataQuery, ReportXML inReportXml, ReportTypeParams inReportTypeParams, bool inBulkMode, bool inSummaryTableCacheOnlyMode, bool inSummaryTableCacheUseOnlyCsvID, bool inRemoveDuplicates, bool inRequireMetadata, string inSummaryTableCacheDir, bool inListFilesMode, ExternalMetadataCache inExternalMetadataCache) { csvFileInfos = new CsvFileInfo[inCsvFilenames.Length]; for (int i = 0; i < inCsvFilenames.Length; i++) { csvFileInfos[i] = new CsvFileInfo(inCsvFilenames[i]); } fileCache = this; writeIndex = 0; useCacheFiles = inUseCacheFiles; readIndex = 0; lookaheadCount = inLookaheadCount; countFreedSinceLastGC = 0; reportXml = inReportXml; bulkMode = inBulkMode; summaryTableCacheOnlyMode = inSummaryTableCacheOnlyMode; summaryTableCacheUseOnlyCsvID = inSummaryTableCacheUseOnlyCsvID; metadataQuery = inMetadataQuery; derivedMetadataMappings = inReportXml.derivedMetadataMappings; summaryTableCacheDir = inSummaryTableCacheDir; reportTypeParams = inReportTypeParams; bRemoveDuplicates = inRemoveDuplicates; bRequireMetadata = inRequireMetadata; bListFilesMode = inListFilesMode; derivedCsvStatDefinitions = reportXml.derivedCsvStatDefinitions.Values.ToArray(); csvIdToFilename = new Dictionary(); outCsvQueue = new BlockingCollection(lookaheadCount); externalMetadataCache = inExternalMetadataCache; // Kick off the workers (must be done last) if (inLookaheadCount > 0) { Console.WriteLine("Kicking off " + inThreadCount + " precache threads with lookahead " + lookaheadCount); precacheThreads = new Thread[inThreadCount]; precacheJobs = new ThreadStart[inThreadCount]; for (int i = 0; i < precacheThreads.Length; i++) { precacheJobs[i] = new ThreadStart(PrecacheThreadRun); precacheThreads[i] = new Thread(precacheJobs[i]); precacheThreads[i].Start(); } } } public CachedCsvFile GetNextCachedCsvFile() { CachedCsvFile file = null; if (readIndex >= csvFileInfos.Length) { // We're done return null; } // Find the next valid fileinfo if (precacheThreads == null) { CsvFileInfo fileInfo = csvFileInfos[readIndex]; file = new CachedCsvFile(fileInfo.filename, useCacheFiles, derivedMetadataMappings, derivedCsvStatDefinitions); if (!file.DoesMetadataMatchQuery(metadataQuery)) // TODO do we need to check this here and in the thread? { return null; } } else { CsvFileInfo fileInfo = outCsvQueue.Take(); file = fileInfo.cachedFile; fileInfo.cachedFile = null; countFreedSinceLastGC++; // Periodically GC if (countFreedSinceLastGC > 16 && summaryTableCacheOnlyMode == false) { GC.Collect(); GC.WaitForPendingFinalizers(); countFreedSinceLastGC = 0; } if (!fileInfo.isValid) { file = null; } } readIndex++; return file; } static void PrecacheThreadRun() { fileCache.ThreadRun(); } void ThreadRun() { int threadWriteIndex = 0; while (true) { threadWriteIndex = Interlocked.Increment(ref writeIndex) - 1; if (threadWriteIndex >= csvFileInfos.Length) { // We're done break; } CsvFileInfo fileInfo = csvFileInfos[threadWriteIndex]; // Process the file CachedCsvFile file = new CachedCsvFile(fileInfo.filename, useCacheFiles, derivedMetadataMappings, derivedCsvStatDefinitions); bool bProcessThisFile = true; if (bRequireMetadata && file.metadata == null) { Console.WriteLine("CSV has no metadata. Skipping: " + fileInfo.filename); bProcessThisFile = false; } else if (bRemoveDuplicates && file.metadata != null && file.metadata.Values.ContainsKey("csvid")) { string csvId = file.metadata.Values["csvid"]; lock (csvIdToFilename) { if (csvIdToFilename.ContainsKey(csvId)) { Console.WriteLine("Duplicate CSV found: " + fileInfo.filename); Console.WriteLine(" First version : " + csvIdToFilename[csvId]); bProcessThisFile = false; duplicateCount++; } else { csvIdToFilename[csvId] = fileInfo.filename; } } } if (bProcessThisFile && file.DoesMetadataMatchQuery(metadataQuery)) { if (bListFilesMode) { Console.WriteLine("File: " + fileInfo.filename); } else if (summaryTableCacheOnlyMode) { // Assume the report type is valid if we have cached metadata, since we don't actually need the reporttype in metadata only mode if (file.cachedSummaryTableRowData != null) { // Just read the full cached metadata file.cachedSummaryTableRowData = SummaryTableRowData.TryReadFromCacheFile(fileInfo.filename); fileInfo.isValid = true; } } else { file.reportTypeInfo = GetCsvReportTypeInfo(file, bulkMode); if (file.reportTypeInfo != null) { string reportTypeHash = summaryTableCacheUseOnlyCsvID ? null : file.reportTypeInfo.GetSummaryTableCacheID(); file.ComputeSummaryTableCacheId(reportTypeHash); if (summaryTableCacheDir != null) { // If a summary metadata cache is specified, try reading from it instead of reading the whole CSV // Note that this will be disabled if we're not in bulk mode file.cachedSummaryTableRowData = SummaryTableRowData.TryReadFromCache(summaryTableCacheDir, file.summaryTableCacheId); if (file.cachedSummaryTableRowData == null) { Console.WriteLine("Failed to read summary metadata from cache for CSV: " + fileInfo.filename); } } if (file.cachedSummaryTableRowData == null) { file.PrepareCsvData(); } // Only read the full file data if the metadata matches fileInfo.isValid = true; } } // Inject external metadata into the cached row data if (file.cachedSummaryTableRowData != null) { externalMetadataCache.InjectExternalMetadata(file.cachedSummaryTableRowData); } } fileInfo.cachedFile = file; // TODO: only add valid files to the queue? Need alternative stopping condition outCsvQueue.Add(fileInfo); } } ReportTypeInfo GetCsvReportTypeInfo(CachedCsvFile csvFile, bool bBulkMode) { try { return reportXml.GetReportTypeInfo(reportTypeParams.reportTypeOverride, csvFile, bBulkMode, reportTypeParams.forceReportType); } catch (Exception e) { if (bBulkMode) { Console.Error.WriteLine("[ERROR] : " + e.Message); return null; } else { // If we're not in bulk mode, exceptions are fatal throw; } } } ThreadStart[] precacheJobs; Thread[] precacheThreads; int writeIndex; int readIndex; int lookaheadCount; int countFreedSinceLastGC; bool useCacheFiles; bool bulkMode; bool bRequireMetadata; bool summaryTableCacheOnlyMode; bool summaryTableCacheUseOnlyCsvID; QueryExpression metadataQuery; string summaryTableCacheDir; ReportXML reportXml; ReportTypeParams reportTypeParams; DerivedMetadataMappings derivedMetadataMappings; DerivedCsvStatDefinition[] derivedCsvStatDefinitions; Dictionary csvIdToFilename; public int duplicateCount = 0; bool bRemoveDuplicates; bool bListFilesMode; static CsvFileCache fileCache; BlockingCollection outCsvQueue; class CsvFileInfo { public CsvFileInfo(string inFilename) { filename = inFilename; cachedFile = null; isValid = false; } public CachedCsvFile cachedFile; public string filename; public bool isValid; } CsvFileInfo[] csvFileInfos; ExternalMetadataCache externalMetadataCache; } }