Skip to content
Snippets Groups Projects
Commit 08f64092 authored by Hiro's avatar Hiro :surfer:
Browse files

Introduce MapDB to handle indexer

parent b2172c72
No related branches found
No related tags found
1 merge request!9Optimize function that is causing OOM errors
Pipeline #218249 failed
......@@ -21,11 +21,18 @@
<include name="jackson-annotations-2.8.6.jar"/>
<include name="jackson-core-2.8.6.jar"/>
<include name="jackson-databind-2.8.6.jar"/>
<include name="mapdb-3.1.0.jar" />
<include name="elsa-3.0.0-M5.jar" />
<include name="lz4-1.3.0.jar" />
<include name="xz-1.6.jar"/>
<include name="metrics-lib-${metricslibversion}-thin.jar"/>
<include name="logback-core-1.2.3.jar" />
<include name="logback-classic-1.2.3.jar" />
<include name="slf4j-api-1.7.22.jar" />
<include name="kotlin-stdlib-1.9.25.jar" />
<include name="eclipse-collections-10.4.0.jar" />
<include name="eclipse-collections-api-10.4.0.jar" />
<include name="guava-31.1-jre.jar" />
</patternset>
<target name="coverage-check">
......
File added
/* Copyright 2019--2020 The Tor Project
/* Copyright 2019--2024 The Tor Project
* See LICENSE for licensing information */
package org.torproject.metrics.collector.indexer;
......@@ -7,6 +7,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import java.io.Serializable;
import java.time.Instant;
import java.util.SortedSet;
import java.util.TreeSet;
......@@ -17,9 +18,12 @@ import java.util.concurrent.Future;
* like whether a descriptor file is currently being indexed or whether its
* link in {@code htdocs/} is marked for deletion.
*/
@JsonPropertyOrder({ "path", "size", "last_modified", "types",
"first_published", "last_published", "sha256" })
class FileNode {
@JsonPropertyOrder({"path", "size", "last_modified", "types",
"first_published", "last_published", "sha256"})
class FileNode implements Serializable {
// Add serialVersionUID to ensure backward compatibility in serialization.
private static final long serialVersionUID = 1L;
/**
* Relative path of the file.
......@@ -70,22 +74,22 @@ class FileNode {
/**
* Indexer result that will be available as soon as the indexer has completed
* its task.
*
* <p>This field is marked as {@code transient} to prevent serialization.</p>
*/
@JsonIgnore
Future<FileNode> indexerResult;
transient Future<FileNode> indexerResult;
/**
* Timestamp when this file was first not found anymore in {@code indexed/},
* used to keep the link in {@code htdocs/} around for another 2 hours before
* deleting it, too.
*
* <p>This field is ignored when writing {@code index.json}, because it's an
* internal detail that nobody else cares about. The effect is that links
* might be around for longer than 2 hours in case of a restart, which seems
* acceptable.</p>
* <p>This field is marked as {@code transient} to prevent serialization
* because it's an internal detail that nobody else cares about.</p>
*/
@JsonIgnore
Instant markedForDeletion;
transient Instant markedForDeletion;
/**
* Create and return a {@link FileNode} instance with the given values.
......@@ -106,8 +110,8 @@ class FileNode {
* @return {@link FileNode} instance with the given values.
*/
static FileNode of(String path, Long size, String lastModified,
Iterable<String> types, String firstPublished, String lastPublished,
String sha256) {
Iterable<String> types, String firstPublished, String lastPublished,
String sha256) {
FileNode fileNode = new FileNode();
fileNode.path = path;
fileNode.size = size;
......@@ -121,5 +125,4 @@ class FileNode {
fileNode.sha256 = sha256;
return fileNode;
}
}
}
\ No newline at end of file
......@@ -401,31 +401,54 @@ public class CreateIndexJsonTest {
*/
@Test
public void testExistingRecentExitList() {
// Create the recent exit list file.
createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
// Write the index.json with the known content (existing recent exit list).
writeIndexJson(recentExitListIndexJsonString);
// Record the state of the htdocs directory before processing.
boolean linkExistsBeforeProcessing = Files.exists(recentExitListLinkPath);
// Start processing.
startProcessing(firstExecution);
assertTrue(this.indexerTasks.isEmpty());
// Record the state of the htdocs directory after processing.
boolean linkExistsAfterProcessing = Files.exists(recentExitListLinkPath);
// Assert that there has been no change in the link's existence.
assertEquals(linkExistsBeforeProcessing, linkExistsAfterProcessing);
}
/**
* Test whether a deleted exit list in {@code indexed/recent/} is first
* removed from {@code index.json} and later deleted from
* {@code htdocs/recent/}.
* removed from {@code index.json} and later deleted from {@code htdocs/recent/}.
*/
@Test
public void testDeletedRecentExitList() {
// Phase 1: Create and Index the Recent Exit List File
createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
writeIndexJson(recentExitListIndexJsonString);
// First execution: Verify that the file is added to the index and the link is created
startProcessing(firstExecution);
assertTrue(fileExists(recentExitListLinkPath));
assertTrue("The link for the recent exit list should exist after the first processing.", fileExists(recentExitListLinkPath));
// Phase 2: Delete the File and Process Again
deleteFile(recentExitListFilePath);
// Second execution: Verify that the file is removed from the index but the link still exists (not deleted immediately)
startProcessing(secondExecution);
assertEquals(emptyIndexJsonString, readIndexJson());
assertTrue(fileExists(recentExitListLinkPath));
assertEquals("The index should be empty after the file is deleted.", emptyIndexJsonString, readIndexJson());
assertTrue("The link should still exist after the second processing, waiting for the grace period.", fileExists(recentExitListLinkPath));
// Phase 3: Final Processing after Grace Period to Remove Link
// Third execution: Verify that the link is deleted after sufficient time has passed
startProcessing(thirdExecution);
assertFalse(fileExists(recentExitListLinkPath));
assertFalse("The link for the deleted file should be removed after the grace period.", fileExists(recentExitListLinkPath));
}
/**
* Test whether a link in {@code htdocs/recent/} for which no corresponding
* file in {@code indexed/recent/} exists is eventually deleted.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment