Skip to content

Commit

Permalink
forum scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
xdnw committed Jul 15, 2023
1 parent 532b907 commit 300bd46
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ public boolean run(Guild guild, IMessageIO channel, final User msgUser, String c
}
if (result != null && !result.isEmpty()) {
result = result.replaceAll("(?i)" + Settings.INSTANCE.API_KEY_PRIMARY, "XXX");
result = result.replaceAll("(?i)(?<=^|[^A-Fa-f0-9])(?:[0-9a-f]{2}){7,}(?=[^A-Fa-f0-9]|$)", "XXX");
// result = result.replaceAll("(?i)(?<=^|[^A-Fa-f0-9])(?:[0-9a-f]{2}){7,}(?=[^A-Fa-f0-9]|$)", "XXX");
channel.send(result);
}
} catch (Throwable e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ public String announce(@Me GuildDB db, @Me Guild guild, @Me JSONObject command,
}
}

List<String> replacementLines = Arrays.asList(replacements.split("\n"));
List<String> replacementLines = Arrays.asList(replacements.split("(?<!\\\\\\\\)\\\\n|\\\\\\\\n"));
System.out.println(replacementLines);

Random random = seed == null ? new Random() : new Random(seed);

Expand Down
115 changes: 115 additions & 0 deletions src/main/java/link/locutus/discord/db/ForumDB.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package link.locutus.discord.db;

import link.locutus.discord.db.entities.DBComment;
import link.locutus.discord.db.entities.DBTopic;
import link.locutus.discord.util.RateLimitUtil;
import link.locutus.discord.util.io.PagePriority;
import link.locutus.discord.util.scheduler.ThrowingConsumer;
Expand All @@ -17,11 +18,16 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
Expand All @@ -42,6 +48,8 @@ public ForumDB(Guild guild) throws SQLException, ClassNotFoundException {
public boolean update() {
try {
List<DBComment> comments = updateAndGetNewComments();
if (guild == null) return true;

List<Category> purgeCategories = new ArrayList<>();
for (DBComment comment : comments) {

Expand Down Expand Up @@ -105,6 +113,113 @@ public void createTables() {
e.printStackTrace();
}
};
{
String nations = "CREATE TABLE IF NOT EXISTS `FORUM_TOPICS` (`topic_id` INT PRIMARY KEY, `section_id` INT NOT NULL, `topic_name` VARCHAR NOT NULL, `topic_urlname` VARCHAR NOT NULL, `section_name` VARCHAR NOT NULL, `section_urlname` VARCHAR NOT NULL, `timestamp` BIGINT NOT NULL, `poster_id` INT NOT NULL, `poster_name` VARCHAR NOT NULL)";
try (Statement stmt = getConnection().createStatement()) {
stmt.addBatch(nations);
stmt.executeBatch();
stmt.clearBatch();
} catch (SQLException e) {
e.printStackTrace();
}
};
}

public void addTopic(DBTopic topic) throws SQLException {
String sql = "INSERT INTO `FORUM_TOPICS` (`topic_id`, `section_id`, `topic_name`, `topic_urlname`, `section_name`, `section_urlname`, `timestamp`, `poster_id`, `poster_name`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
try (PreparedStatement stmt = getConnection().prepareStatement(sql)) {
stmt.setInt(1, topic.topic_id);
stmt.setInt(2, topic.section_id);
stmt.setString(3, topic.topic_name);
stmt.setString(4, topic.topic_urlname);
stmt.setString(5, topic.section_name);
stmt.setString(6, topic.section_urlname);
stmt.setLong(7, topic.timestamp);
stmt.setInt(8, topic.poster_id);
stmt.setString(9, topic.poster_name);
stmt.execute();
}
}

private static String get(String requestURL) throws IOException {
URL website = new URL(requestURL);
URLConnection connection = website.openConnection();
try (BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream()))) {

StringBuilder response = new StringBuilder();
String inputLine;

while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
return response.toString();
}
}

public void scrapeTopic(int section_id, String section_name) throws SQLException, IOException {
String baseUrl = "https://forum.politicsandwar.com/index.php?/forum/" + section_id + "-" + section_name + "/";
int page = 1;
while (true) {
String url = baseUrl + "page/" + page + "/";

String content = get(url);
Document html = Jsoup.parse(content);
// get every ipsType_break ipsContained
Elements elems = html.select(".ipsType_break.ipsContained");
for (Element elem : elems) {
// get href
Elements a = elem.select("a");
String topicUrl = a.attr("href");
String topicName = a.text();
int topic_id = Integer.parseInt(topicUrl.split("topic/")[1].split("-")[0]);

String topicUrlName = topicUrl.split("topic/")[1].split("-", 2)[1].split("/")[0];

// get date
Elements date = elem.select("time");
String dateStr = date.attr("datetime");
// to milliseconds
long timestamp = Instant.parse(dateStr).toEpochMilli();

// get poster id and name (611 and prefontaine)
Elements poster = elem.select("a[href^='https://forum.politicsandwar.com/index.php?/profile/']");
String posterName = poster.text();
String posterUrl = poster.attr("href");
int posterId = Integer.parseInt(posterUrl.split("profile/")[1].split("-")[0]);

// public DBTopic(int topic_id, int section_id, String topic_name, String topic_urlname, String section_name, String section_urlname, long timestamp, int poster_id, String poster_name) {

System.out.println("Saving: " + topicName);
DBTopic topic = new DBTopic(topic_id, section_id, topicName, topicUrlName, section_name, section_name, timestamp, posterId, posterName);
addTopic(topic);
}

// get highest data-page
Elements pages = html.select("li.ipsPagination_page");
// if exists
if (pages.size() > 0) {
// get last page
Element lastPage = pages.get(pages.size() - 1);
// get href
String href = lastPage.select("a").attr("href");
// get page number
int highestPage = Integer.parseInt(href.split("page/")[1].split("/")[0]);
if (highestPage > page) {
page++;
} else {
break;
}
} else {
break;
}
}
}

public static void main(String[] args) throws SQLException, ClassNotFoundException, IOException {
ForumDB db = new ForumDB(null);

db.scrapeTopic(42, "alliance-affairs");
}

public List<DBComment> updateAndGetNewComments() throws IOException {
Expand Down
24 changes: 13 additions & 11 deletions src/main/java/link/locutus/discord/db/NationDB.java
Original file line number Diff line number Diff line change
Expand Up @@ -2958,17 +2958,19 @@ private void importLegacyNationLoot(boolean fromAttacks) throws SQLException {
getDb().drop("NATION_LOOT");
}

Map<Integer, Map.Entry<Long, double[]>> nationLoot = Locutus.imp().getWarDb().getNationLootFromAttacksLegacy();
for (Map.Entry<Integer, Map.Entry<Long, double[]>> entry : nationLoot.entrySet()) {
int nationId = entry.getKey();
long date = entry.getValue().getKey();
double[] loot = entry.getValue().getValue();
NationLootType type = NationLootType.WAR_LOSS;
lootInfoList.add(new LootEntry(nationId, loot, date, type));
}

if (!lootInfoList.isEmpty()) {
saveNationLoot(lootInfoList);
if (fromAttacks) {
Map<Integer, Map.Entry<Long, double[]>> nationLoot = Locutus.imp().getWarDb().getNationLootFromAttacksLegacy();
for (Map.Entry<Integer, Map.Entry<Long, double[]>> entry : nationLoot.entrySet()) {
int nationId = entry.getKey();
long date = entry.getValue().getKey();
double[] loot = entry.getValue().getValue();
NationLootType type = NationLootType.WAR_LOSS;
lootInfoList.add(new LootEntry(nationId, loot, date, type));
}

if (!lootInfoList.isEmpty()) {
saveNationLoot(lootInfoList);
}
}
}

Expand Down
26 changes: 26 additions & 0 deletions src/main/java/link/locutus/discord/db/entities/DBTopic.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package link.locutus.discord.db.entities;

public class DBTopic {
public final int topic_id;
public final int section_id;
public final String topic_name;
public final String topic_urlname;
public final String section_name;
public final String section_urlname;
public final long timestamp;
public final int poster_id;
public final String poster_name;

public DBTopic(int topic_id, int section_id, String topic_name, String topic_urlname, String section_name, String section_urlname, long timestamp, int poster_id, String poster_name) {
this.topic_id = topic_id;
this.section_id = section_id;
this.topic_name = topic_name;
this.topic_urlname = topic_urlname;
this.section_name = section_name;
this.section_urlname = section_urlname;
this.timestamp = timestamp;
this.poster_id = poster_id;
this.poster_name = poster_name;
}

}
74 changes: 38 additions & 36 deletions src/main/java/link/locutus/discord/db/guild/GuildKey.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,53 +115,55 @@ public Set<Integer> validate(GuildDB db, Set<Integer> aaIds) {
if (ownerNation == null || ownerNation.getAlliance_id() != aaId || ownerNation.getPosition() < Rank.LEADER.id) {
Set<String> inviteCodes = new HashSet<>();
boolean isValid = Roles.ADMIN.hasOnRoot(owner.getUser());
try {
if (!isValid) {
try {
List<Invite> invites = RateLimitUtil.complete(db.getGuild().retrieveInvites());
for (Invite invite : invites) {
String inviteCode = invite.getCode();
inviteCodes.add(inviteCode);
}
} catch (InsufficientPermissionException ignore) {
}

if (!inviteCodes.isEmpty() && alliance.getDiscord_link() != null && !alliance.getDiscord_link().isEmpty()) {
for (String code : inviteCodes) {
if (alliance.getDiscord_link().contains(code)) {
isValid = true;
break;
try {
List<Invite> invites = RateLimitUtil.complete(db.getGuild().retrieveInvites());
for (Invite invite : invites) {
String inviteCode = invite.getCode();
inviteCodes.add(inviteCode);
}
} catch (Throwable ignore) {
}
}

if (!isValid) {
String url = Settings.INSTANCE.PNW_URL() + "/alliance/id=" + aaId;
String content = FileUtil.readStringFromURL(PagePriority.ALLIANCE_ID_AUTH_CODE.ordinal(), url);
String idStr = db.getGuild().getId();

if (!content.contains(idStr)) {
for (String inviteCode : inviteCodes) {
if (content.contains(inviteCode)) {
if (!inviteCodes.isEmpty() && alliance.getDiscord_link() != null && !alliance.getDiscord_link().isEmpty()) {
for (String code : inviteCodes) {
if (alliance.getDiscord_link().contains(code)) {
isValid = true;
break;
}
}
} else {
isValid = true;
}
}

if (!isValid) {
String msg = "1. Go to: <" + Settings.INSTANCE.PNW_URL() + "/alliance/edit/id=" + aaId + ">\n" +
"2. Scroll down to where it says Alliance Description:\n" +
"3. Put your guild id `" + db.getIdLong() + "` somewhere in the text\n" +
"4. Click save\n" +
"5. Run the command " + getCommandObj(aaIds) + " again\n" +
"(note: you can remove the id after setup)";
throw new IllegalArgumentException(msg);
if (!isValid) {
String url = Settings.INSTANCE.PNW_URL() + "/alliance/id=" + aaId;
String content = FileUtil.readStringFromURL(PagePriority.ALLIANCE_ID_AUTH_CODE.ordinal(), url);
String idStr = db.getGuild().getId();

if (!content.contains(idStr)) {
for (String inviteCode : inviteCodes) {
if (content.contains(inviteCode)) {
isValid = true;
break;
}
}
} else {
isValid = true;
}
}

if (!isValid) {
String msg = "1. Go to: <" + Settings.INSTANCE.PNW_URL() + "/alliance/edit/id=" + aaId + ">\n" +
"2. Scroll down to where it says Alliance Description:\n" +
"3. Put your guild id `" + db.getIdLong() + "` somewhere in the text\n" +
"4. Click save\n" +
"5. Run the command " + getCommandObj(aaIds) + " again\n" +
"(note: you can remove the id after setup)";
throw new IllegalArgumentException(msg);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/link/locutus/discord/gpt/GPTUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public static int getTokens(String input, ModelType type) {
Encoding enc = registry.getEncodingForModel(type);
return enc.encode(input).size();
}

public static List<String> getChunks(String input, ModelType type, int tokenSizeCap) {
List<String> result = new ArrayList<>();

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/link/locutus/discord/gpt/GptHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public GptHandler() throws SQLException, ClassNotFoundException {
this.embeddingDatabase = new AdaEmbedding(registry, service);
// TODO change ^ that to mini

File gpt4freePath = new File("../gpt4free/gpt3_5_turbo.py");
File gpt4freePath = new File("../gpt4free/my_project/gpt3_5_turbo.py");
File venvExe = new File("../gpt4free/venv/Scripts/python.exe");
// ensure files exist
if (!gpt4freePath.exists()) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/link/locutus/discord/util/StringMan.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ public static Set<String> enumerateReplacements(String announcement, List<String
for (int i = 0; i < replacements.size(); i++) {
String replacement = replacements.get(i);
String[] split = replacement.split("\\|");
if (split.length < 2) throw new IllegalArgumentException("Term `" + split[0] + "` has no replacement options");
if (split.length < 2) throw new IllegalArgumentException("Term `" + split[0] + "` has no replacement options " + " | `" + replacement + "` (" + StringMan.getString(split) + ")");
String search = split[0];
if (search.isEmpty()) throw new IllegalArgumentException("Search term " + i + " is empty");
if (announcement.indexOf(search) <= 0) throw new IllegalArgumentException("No match found for `" + search + "`");
Expand Down

0 comments on commit 300bd46

Please sign in to comment.