Skip to content
This repository was archived by the owner on Apr 9, 2025. It is now read-only.

Commit ee552ba

Browse files
Mischnbermeitinger-b
authored andcommitted
8.0.0 (#7)
* Generalized SharedNPPreParticipalExtractor. * Changed order of discourse extraction rules. * Fix in SignalPhraseClassifier. * Included sentence simplification. Refactored and restructured project. * 8.0.0-SNAPSHOT. Leaf-classes contain Tree-instance instead of String. * Bugfixes: ignore-case in classifier. * Updated rule-implementations and used relations. Used external resources for signal phrases and attribution verbs. * Licence fixes. * Updated README. * Release version 5.0.0
1 parent 05ef516 commit ee552ba

File tree

81 files changed

+3502
-1910
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+3502
-1910
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,18 @@ Clone and install locally
2020
mvn package
2121

2222
### Run the program
23+
Create a new text file with the input
24+
25+
vim input.txt
26+
27+
Run program
2328

2429
mvn clean compile exec:java
30+
31+
Inspect output
32+
33+
cat output_default.txt
34+
cat output_flat.txt
2535

2636
## Use as library
2737
Check `App.java`.

pom.xml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
<groupId>org.lambda3.text.simplification</groupId>
2828
<artifactId>discourse-simplification</artifactId>
29-
<version>7.1.0</version>
29+
<version>8.0.0</version>
3030
<packaging>jar</packaging>
3131

3232
<name>Discourse Simplification</name>
@@ -41,6 +41,9 @@
4141

4242
<properties>
4343
<config.version>1.3.1</config.version>
44+
<jackson.version>2.8.8</jackson.version>
45+
46+
<simplification.version>5.0.0</simplification.version>
4447

4548
<corenlp.version>3.7.0</corenlp.version>
4649
<logback.version>1.1.8</logback.version>
@@ -62,23 +65,35 @@
6265
<version>${config.version}</version>
6366
</dependency>
6467

65-
<!-- Stanford NLP -->
68+
<!-- Jackson -->
6669
<dependency>
67-
<groupId>edu.stanford.nlp</groupId>
68-
<artifactId>stanford-corenlp</artifactId>
69-
<version>${corenlp.version}</version>
70-
<exclusions>
71-
<exclusion>
72-
<artifactId>slf4j-api</artifactId>
73-
<groupId>org.slf4j</groupId>
74-
</exclusion>
75-
</exclusions>
70+
<groupId>com.fasterxml.jackson.core</groupId>
71+
<artifactId>jackson-annotations</artifactId>
72+
<version>${jackson.version}</version>
73+
</dependency>
74+
<dependency>
75+
<groupId>com.fasterxml.jackson.core</groupId>
76+
<artifactId>jackson-core</artifactId>
77+
<version>${jackson.version}</version>
78+
</dependency>
79+
<dependency>
80+
<groupId>com.fasterxml.jackson.core</groupId>
81+
<artifactId>jackson-databind</artifactId>
82+
<version>${jackson.version}</version>
7683
</dependency>
84+
85+
<!-- Sentence Simplification -->
86+
<dependency>
87+
<groupId>org.lambda3.text.simplification</groupId>
88+
<artifactId>sentence-simplification</artifactId>
89+
<version>${simplification.version}</version>
90+
</dependency>
91+
92+
<!-- Stanford NLP -->
7793
<dependency>
7894
<groupId>edu.stanford.nlp</groupId>
7995
<artifactId>stanford-corenlp</artifactId>
8096
<version>${corenlp.version}</version>
81-
<classifier>models</classifier>
8297
<exclusions>
8398
<exclusion>
8499
<artifactId>slf4j-api</artifactId>

src/main/java/org/lambda3/text/simplification/discourse/App.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,40 @@
2222

2323
package org.lambda3.text.simplification.discourse;
2424

25-
import org.lambda3.text.simplification.discourse.processing.OutSentence;
26-
import org.lambda3.text.simplification.discourse.processing.Processor;
25+
import org.lambda3.text.simplification.discourse.processing.DiscourseSimplifier;
26+
import org.lambda3.text.simplification.discourse.processing.ProcessingType;
27+
import org.lambda3.text.simplification.discourse.model.SimplificationContent;
2728
import org.slf4j.LoggerFactory;
2829

30+
import java.io.BufferedWriter;
2931
import java.io.File;
32+
import java.io.FileWriter;
3033
import java.io.IOException;
34+
import java.util.Arrays;
3135
import java.util.List;
36+
import java.util.stream.Collectors;
3237

3338
public class App {
3439
private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(App.class);
35-
private static final Processor PROCESSOR = new Processor();
40+
private static final DiscourseSimplifier DISCOURSE_SIMPLIFIER = new DiscourseSimplifier();
41+
42+
private static void saveLines(File file, List<String> lines) {
43+
try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
44+
bw.write(lines.stream().collect(Collectors.joining("\n")));
45+
46+
// no need to close it.
47+
//bw.close()
48+
} catch (IOException e) {
49+
e.printStackTrace();
50+
}
51+
}
3652

3753
public static void main(String[] args) throws IOException {
3854

39-
List<OutSentence> sentences = PROCESSOR.process(new File("input.txt"), Processor.ProcessingType.WHOLE);
40-
// List<OutSentence> sentences = PROCESSOR.process("The text.", Processor.ProcessingType.WHOLE);
55+
SimplificationContent content = DISCOURSE_SIMPLIFIER.doDiscourseSimplification(new File("input.txt"), ProcessingType.WHOLE);
56+
content.serializeToJSON(new File("output.json"));
57+
saveLines(new File("output_default.txt"), Arrays.asList(content.defaultFormat(false)));
58+
saveLines(new File("output_flat.txt"), Arrays.asList(content.flatFormat(false)));
59+
LOGGER.info("done");
4160
}
4261
}

src/main/java/org/lambda3/text/simplification/discourse/Test.java

Lines changed: 0 additions & 66 deletions
This file was deleted.
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : Content
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.model;
24+
25+
import com.fasterxml.jackson.annotation.JsonAutoDetect;
26+
import com.fasterxml.jackson.annotation.PropertyAccessor;
27+
import com.fasterxml.jackson.core.JsonProcessingException;
28+
import com.fasterxml.jackson.databind.ObjectMapper;
29+
import com.fasterxml.jackson.databind.module.SimpleModule;
30+
import edu.stanford.nlp.trees.Tree;
31+
import org.lambda3.text.simplification.discourse.model.serializer.TreeDeserializer;
32+
import org.lambda3.text.simplification.discourse.model.serializer.TreeSerializer;
33+
34+
import java.io.File;
35+
import java.io.IOException;
36+
37+
public abstract class Content {
38+
private static final ObjectMapper MAPPER = new ObjectMapper();
39+
private static final SimpleModule MODULE = new SimpleModule();
40+
41+
static {
42+
MAPPER.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE);
43+
MAPPER.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
44+
45+
// register custom de-/serializers
46+
MODULE.addSerializer(Tree.class, new TreeSerializer());
47+
MODULE.addDeserializer(Tree.class, new TreeDeserializer());
48+
49+
MAPPER.registerModule(MODULE);
50+
}
51+
52+
public static <T extends Content> T deserializeFromJSON(String json, Class<T> clazz) throws IOException {
53+
return MAPPER.readValue(json, clazz);
54+
}
55+
56+
public static <T extends Content> T deserializeFromJSON(File file, Class<T> clazz) throws IOException {
57+
return MAPPER.readValue(file, clazz);
58+
}
59+
60+
public String prettyPrintJSON() throws JsonProcessingException {
61+
return MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this);
62+
}
63+
64+
public String serializeToJSON() throws JsonProcessingException {
65+
return MAPPER.writeValueAsString(this);
66+
}
67+
68+
public void serializeToJSON(File file) throws IOException {
69+
MAPPER.writeValue(file, this);
70+
}
71+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* ==========================License-Start=============================
3+
* DiscourseSimplification : Element
4+
*
5+
* Copyright © 2017 Lambda³
6+
*
7+
* GNU General Public License 3
8+
* This program is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* (at your option) any later version.
12+
*
13+
* This program is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* along with this program. If not, see http://www.gnu.org/licenses/.
20+
* ==========================License-End==============================
21+
*/
22+
23+
package org.lambda3.text.simplification.discourse.model;
24+
25+
import edu.stanford.nlp.trees.Tree;
26+
import org.lambda3.text.simplification.discourse.utils.IDGenerator;
27+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
28+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
29+
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
30+
import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
31+
32+
import java.util.ArrayList;
33+
import java.util.List;
34+
35+
/**
36+
*
37+
*/
38+
public class Element {
39+
private String id;
40+
private Tree parseTree;
41+
private int sentenceIdx;
42+
private int contextLayer;
43+
private List<SimpleContext> simpleContexts;
44+
private List<LinkedContext> linkedContexts;
45+
46+
// for deserialization
47+
public Element() {
48+
}
49+
50+
public Element(Tree parseTree, int sentenceIdx, int contextLayer) {
51+
this.id = IDGenerator.generateUUID();
52+
this.parseTree = parseTree;
53+
this.sentenceIdx = sentenceIdx;
54+
this.contextLayer = contextLayer;
55+
this.simpleContexts = new ArrayList<>();
56+
this.linkedContexts = new ArrayList<>();
57+
}
58+
59+
// not efficient -> prefer to use constructor with tree
60+
public Element(String text, int sentenceIdx, int contextLayer) throws ParseTreeException {
61+
this(ParseTreeParser.parse(text), sentenceIdx, contextLayer);
62+
}
63+
64+
public void addLinkedContext(LinkedContext context) {
65+
if (!linkedContexts.contains(context)) {
66+
linkedContexts.add(context);
67+
}
68+
}
69+
70+
public void addSimpleContext(SimpleContext context) {
71+
if (!simpleContexts.contains(context)) {
72+
simpleContexts.add(context);
73+
}
74+
}
75+
76+
public String getId() {
77+
return id;
78+
}
79+
80+
public Tree getParseTree() {
81+
return parseTree;
82+
}
83+
84+
public void setParseTree(Tree parseTree) {
85+
this.parseTree = parseTree;
86+
}
87+
88+
public String getText() {
89+
return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree));
90+
}
91+
92+
public int getSentenceIdx() {
93+
return sentenceIdx;
94+
}
95+
96+
public int getContextLayer() {
97+
return contextLayer;
98+
}
99+
100+
public List<SimpleContext> getSimpleContexts() {
101+
return simpleContexts;
102+
}
103+
104+
public List<LinkedContext> getLinkedContexts() {
105+
return linkedContexts;
106+
}
107+
108+
@Override
109+
public String toString() {
110+
StringBuilder strb = new StringBuilder();
111+
strb.append(id + " " + contextLayer + " " + getText() + "\n");
112+
getSimpleContexts().forEach(c -> strb.append("\tS:" + c.getRelation() + " " + c.getText() + "\n"));
113+
getLinkedContexts().forEach(c -> strb.append("\tL:" + c.getRelation() + " " + c.getTargetID() + "\n"));
114+
return strb.toString();
115+
}
116+
}

0 commit comments

Comments
 (0)