Skip to content

Commit 6a0a877

Browse files
committed
Move MRDP folder to root
1 parent ad85347 commit 6a0a877

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+6744
-6744
lines changed
Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,65 @@
1-
package mrdp;
2-
3-
import java.util.Arrays;
4-
5-
import mrdp.ch1.*;
6-
import mrdp.ch2.*;
7-
import mrdp.ch3.*;
8-
import mrdp.ch4.*;
9-
import mrdp.ch5.*;
10-
import mrdp.ch6.*;
11-
import mrdp.ch7.*;
12-
import mrdp.utils.MRDPUtils;
13-
14-
import org.apache.hadoop.conf.Configuration;
15-
import org.apache.hadoop.conf.Configured;
16-
import org.apache.hadoop.util.Tool;
17-
import org.apache.hadoop.util.ToolRunner;
18-
19-
@SuppressWarnings("unused")
20-
public class MRDPMain extends Configured implements Tool {
21-
22-
public static void main(String[] args) throws Exception {
23-
System.exit(ToolRunner.run(new Configuration(), new MRDPMain(), args));
24-
}
25-
26-
@Override
27-
public int run(String[] args) throws Exception {
28-
if (args.length > 0) {
29-
String example = args[0];
30-
String[] otherArgs = Arrays.copyOfRange(args, 1, args.length);
31-
32-
if (example.equalsIgnoreCase("PartitionPruningOutput")) {
33-
PartitionPruningOutputDriver.main(otherArgs);
34-
} else if (example.equalsIgnoreCase("PartitionPruningInput")) {
35-
PartitionPruningInputDriver.main(otherArgs);
36-
} else if (example.equalsIgnoreCase("RedisInput")) {
37-
RedisInputDriver.main(otherArgs);
38-
} else if (example.equalsIgnoreCase("RedisOutput")) {
39-
RedisOutputDriver.main(otherArgs);
40-
} else {
41-
printHelp();
42-
return 1;
43-
}
44-
45-
return 0;
46-
} else {
47-
printHelp();
48-
return 1;
49-
}
50-
}
51-
52-
private void printHelp() {
53-
System.out
54-
.println("Usage: hadoop jar mrdp.jar <example> <example args>");
55-
System.out.println("Examples are:");
56-
System.out.println("Chapter 7:");
57-
System.out
58-
.println("\tRedisOutput <user data> <redis hosts> <hashset name>");
59-
System.out
60-
.println("\tRedisInput <redis hosts> <hashset name> <output>");
61-
System.out.println("\tPartitionPruningOutput <user data>");
62-
System.out
63-
.println("\tPartitionPruningInput <last access months> <output>");
64-
}
65-
}
1+
package mrdp;
2+
3+
import java.util.Arrays;
4+
5+
import mrdp.ch1.*;
6+
import mrdp.ch2.*;
7+
import mrdp.ch3.*;
8+
import mrdp.ch4.*;
9+
import mrdp.ch5.*;
10+
import mrdp.ch6.*;
11+
import mrdp.ch7.*;
12+
import mrdp.utils.MRDPUtils;
13+
14+
import org.apache.hadoop.conf.Configuration;
15+
import org.apache.hadoop.conf.Configured;
16+
import org.apache.hadoop.util.Tool;
17+
import org.apache.hadoop.util.ToolRunner;
18+
19+
@SuppressWarnings("unused")
20+
public class MRDPMain extends Configured implements Tool {
21+
22+
public static void main(String[] args) throws Exception {
23+
System.exit(ToolRunner.run(new Configuration(), new MRDPMain(), args));
24+
}
25+
26+
@Override
27+
public int run(String[] args) throws Exception {
28+
if (args.length > 0) {
29+
String example = args[0];
30+
String[] otherArgs = Arrays.copyOfRange(args, 1, args.length);
31+
32+
if (example.equalsIgnoreCase("PartitionPruningOutput")) {
33+
PartitionPruningOutputDriver.main(otherArgs);
34+
} else if (example.equalsIgnoreCase("PartitionPruningInput")) {
35+
PartitionPruningInputDriver.main(otherArgs);
36+
} else if (example.equalsIgnoreCase("RedisInput")) {
37+
RedisInputDriver.main(otherArgs);
38+
} else if (example.equalsIgnoreCase("RedisOutput")) {
39+
RedisOutputDriver.main(otherArgs);
40+
} else {
41+
printHelp();
42+
return 1;
43+
}
44+
45+
return 0;
46+
} else {
47+
printHelp();
48+
return 1;
49+
}
50+
}
51+
52+
private void printHelp() {
53+
System.out
54+
.println("Usage: hadoop jar mrdp.jar <example> <example args>");
55+
System.out.println("Examples are:");
56+
System.out.println("Chapter 7:");
57+
System.out
58+
.println("\tRedisOutput <user data> <redis hosts> <hashset name>");
59+
System.out
60+
.println("\tRedisInput <redis hosts> <hashset name> <output>");
61+
System.out.println("\tPartitionPruningOutput <user data>");
62+
System.out
63+
.println("\tPartitionPruningInput <last access months> <output>");
64+
}
65+
}
Lines changed: 97 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,97 @@
1-
package mrdp.appendixA;
2-
3-
import java.io.BufferedReader;
4-
import java.io.InputStreamReader;
5-
import java.util.zip.GZIPInputStream;
6-
7-
import org.apache.hadoop.conf.Configuration;
8-
import org.apache.hadoop.fs.FSDataOutputStream;
9-
import org.apache.hadoop.fs.FileStatus;
10-
import org.apache.hadoop.fs.FileSystem;
11-
import org.apache.hadoop.fs.Path;
12-
import org.apache.hadoop.util.GenericOptionsParser;
13-
import org.apache.hadoop.util.bloom.BloomFilter;
14-
import org.apache.hadoop.util.bloom.Key;
15-
import org.apache.hadoop.util.hash.Hash;
16-
17-
public class BloomFilterDriver {
18-
19-
public static void main(String[] args) throws Exception {
20-
Configuration conf = new Configuration();
21-
String[] otherArgs = new GenericOptionsParser(conf, args)
22-
.getRemainingArgs();
23-
if (otherArgs.length != 4) {
24-
System.err
25-
.println("Usage: BloomFilterWriter <inputfile> <nummembers> <falseposrate> <bfoutfile>");
26-
System.exit(1);
27-
}
28-
29-
FileSystem fs = FileSystem.get(new Configuration());
30-
31-
// Parse command line arguments
32-
Path inputFile = new Path(otherArgs[0]);
33-
int numMembers = Integer.parseInt(otherArgs[1]);
34-
float falsePosRate = Float.parseFloat(otherArgs[2]);
35-
Path bfFile = new Path(otherArgs[3]);
36-
37-
// Calculate our vector size and optimal K value based on approximations
38-
int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
39-
int nbHash = getOptimalK(numMembers, vectorSize);
40-
41-
// create new Bloom filter
42-
BloomFilter filter = new BloomFilter(vectorSize, nbHash,
43-
Hash.MURMUR_HASH);
44-
45-
// Open file for read
46-
47-
System.out.println("Training Bloom filter of size " + vectorSize
48-
+ " with " + nbHash + " hash functions, " + numMembers
49-
+ " approximate number of records, and " + falsePosRate
50-
+ " false positive rate");
51-
52-
String line = null;
53-
int numRecords = 0;
54-
for (FileStatus status : fs.listStatus(inputFile)) {
55-
BufferedReader rdr;
56-
// if file is gzipped, wrap it in a GZIPInputStream
57-
if (status.getPath().getName().endsWith(".gz")) {
58-
rdr = new BufferedReader(new InputStreamReader(
59-
new GZIPInputStream(fs.open(status.getPath()))));
60-
} else {
61-
rdr = new BufferedReader(new InputStreamReader(fs.open(status
62-
.getPath())));
63-
}
64-
65-
System.out.println("Reading " + status.getPath());
66-
while ((line = rdr.readLine()) != null) {
67-
filter.add(new Key(line.getBytes()));
68-
++numRecords;
69-
}
70-
71-
rdr.close();
72-
}
73-
74-
System.out.println("Trained Bloom filter with " + numRecords
75-
+ " entries.");
76-
77-
System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
78-
FSDataOutputStream strm = fs.create(bfFile);
79-
filter.write(strm);
80-
81-
strm.flush();
82-
strm.close();
83-
84-
System.out.println("Done training Bloom filter.");
85-
}
86-
87-
public static int getOptimalBloomFilterSize(int numRecords,
88-
float falsePosRate) {
89-
int size = (int) (-numRecords * (float) Math.log(falsePosRate) / Math
90-
.pow(Math.log(2), 2));
91-
return size;
92-
}
93-
94-
public static int getOptimalK(float numMembers, float vectorSize) {
95-
return (int) Math.round(vectorSize / numMembers * Math.log(2));
96-
}
97-
}
1+
package mrdp.appendixA;
2+
3+
import java.io.BufferedReader;
4+
import java.io.InputStreamReader;
5+
import java.util.zip.GZIPInputStream;
6+
7+
import org.apache.hadoop.conf.Configuration;
8+
import org.apache.hadoop.fs.FSDataOutputStream;
9+
import org.apache.hadoop.fs.FileStatus;
10+
import org.apache.hadoop.fs.FileSystem;
11+
import org.apache.hadoop.fs.Path;
12+
import org.apache.hadoop.util.GenericOptionsParser;
13+
import org.apache.hadoop.util.bloom.BloomFilter;
14+
import org.apache.hadoop.util.bloom.Key;
15+
import org.apache.hadoop.util.hash.Hash;
16+
17+
public class BloomFilterDriver {
18+
19+
public static void main(String[] args) throws Exception {
20+
Configuration conf = new Configuration();
21+
String[] otherArgs = new GenericOptionsParser(conf, args)
22+
.getRemainingArgs();
23+
if (otherArgs.length != 4) {
24+
System.err
25+
.println("Usage: BloomFilterWriter <inputfile> <nummembers> <falseposrate> <bfoutfile>");
26+
System.exit(1);
27+
}
28+
29+
FileSystem fs = FileSystem.get(new Configuration());
30+
31+
// Parse command line arguments
32+
Path inputFile = new Path(otherArgs[0]);
33+
int numMembers = Integer.parseInt(otherArgs[1]);
34+
float falsePosRate = Float.parseFloat(otherArgs[2]);
35+
Path bfFile = new Path(otherArgs[3]);
36+
37+
// Calculate our vector size and optimal K value based on approximations
38+
int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
39+
int nbHash = getOptimalK(numMembers, vectorSize);
40+
41+
// create new Bloom filter
42+
BloomFilter filter = new BloomFilter(vectorSize, nbHash,
43+
Hash.MURMUR_HASH);
44+
45+
// Open file for read
46+
47+
System.out.println("Training Bloom filter of size " + vectorSize
48+
+ " with " + nbHash + " hash functions, " + numMembers
49+
+ " approximate number of records, and " + falsePosRate
50+
+ " false positive rate");
51+
52+
String line = null;
53+
int numRecords = 0;
54+
for (FileStatus status : fs.listStatus(inputFile)) {
55+
BufferedReader rdr;
56+
// if file is gzipped, wrap it in a GZIPInputStream
57+
if (status.getPath().getName().endsWith(".gz")) {
58+
rdr = new BufferedReader(new InputStreamReader(
59+
new GZIPInputStream(fs.open(status.getPath()))));
60+
} else {
61+
rdr = new BufferedReader(new InputStreamReader(fs.open(status
62+
.getPath())));
63+
}
64+
65+
System.out.println("Reading " + status.getPath());
66+
while ((line = rdr.readLine()) != null) {
67+
filter.add(new Key(line.getBytes()));
68+
++numRecords;
69+
}
70+
71+
rdr.close();
72+
}
73+
74+
System.out.println("Trained Bloom filter with " + numRecords
75+
+ " entries.");
76+
77+
System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
78+
FSDataOutputStream strm = fs.create(bfFile);
79+
filter.write(strm);
80+
81+
strm.flush();
82+
strm.close();
83+
84+
System.out.println("Done training Bloom filter.");
85+
}
86+
87+
public static int getOptimalBloomFilterSize(int numRecords,
88+
float falsePosRate) {
89+
int size = (int) (-numRecords * (float) Math.log(falsePosRate) / Math
90+
.pow(Math.log(2), 2));
91+
return size;
92+
}
93+
94+
public static int getOptimalK(float numMembers, float vectorSize) {
95+
return (int) Math.round(vectorSize / numMembers * Math.log(2));
96+
}
97+
}

0 commit comments

Comments
 (0)