Skip to content

Commit f79c657

Browse files
committedAug 24, 2020
Merge branch 'development' of https://github.com/loklak/loklak_server.git into development
2 parents d6ea5ac + d866d7d commit f79c657

15 files changed

+66
-699
lines changed
 

‎bin/upgrade.sh

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
#!/usr/bin/env sh
22
cd `dirname $0`/..
33
echo "loading latest code changes"
4-
git pull origin master
5-
echo "clean up"
6-
./gradlew clean
7-
echo "building loklak"
8-
./gradlew build
4+
git pull -r
5+
echo "assembling loklak"
6+
./gradlew assemble
97
bin/restart.sh

‎src/org/json/JSONObject.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ public JSONObject(JSONTokener x) throws JSONException {
297297
*/
298298
public JSONObject(Map<?, ?> m) {
299299
if (m == null) {
300-
this.map = new HashMap<String, Object>();
300+
this.map = new LinkedHashMap<String, Object>();
301301
} else {
302302
this.map = new HashMap<String, Object>(m.size());
303303
for (final Entry<?, ?> e : m.entrySet()) {

‎src/org/loklak/Caretaker.java

+25-21
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public void run() {
107107
if (SuggestServlet.cache.size() > 100) SuggestServlet.cache.clear();
108108

109109
// sleep a bit to prevent that the DoS limit fires at backend server
110-
try {Thread.sleep(busy ? 500 : 5000);} catch (InterruptedException e) {}
110+
try {Thread.sleep(busy ? 500 : 10000);} catch (InterruptedException e) {}
111111
if (!this.shallRun) break beat;
112112
busy = false;
113113

@@ -165,32 +165,36 @@ public void run() {
165165
try {
166166
pendingQueries = LoklakServer.harvester.get_harvest_queries();
167167
} catch (IOException e) {
168+
DAO.severe("FAIL SUGGESTIONS cannot get queries from backend: " + e.getMessage(), e);
169+
break hloop;
168170
}
169171

170-
// in case we have queries
171-
if (pendingQueries > 0) {
172-
Thread[] rts = new Thread[Math.min(pendingQueries, retrieval_forbackend_concurrency)];
173-
final AtomicInteger acccount = new AtomicInteger(0);
174-
for (int j = 0; j < rts.length; j++) {
175-
rts[j] = new Thread() {
176-
public void run() {
177-
TwitterTimeline tl = LoklakServer.harvester.harvest_timeline();
178-
if (tl != null && tl.getQuery() != null) {
179-
/* Thread t = */ LoklakServer.harvester.push_timeline_to_backend(tl);
180-
}
181-
int count = tl == null ? 0 : tl.size();
182-
acccount.addAndGet(count);
172+
// without queries we cannot do harvesting
173+
if (pendingQueries == 0) break hloop;
174+
175+
// start harvesting
176+
Thread[] rts = new Thread[Math.min(pendingQueries, retrieval_forbackend_concurrency)];
177+
final AtomicInteger acccount = new AtomicInteger(0);
178+
for (int j = 0; j < rts.length; j++) {
179+
rts[j] = new Thread() {
180+
public void run() {
181+
TwitterTimeline tl = LoklakServer.harvester.harvest_timeline();
182+
if (tl != null && tl.getQuery() != null) {
183+
/* Thread t = */ LoklakServer.harvester.push_timeline_to_backend(tl);
183184
}
184-
};
185-
rts[j].start();
186-
try {Thread.sleep(retrieval_forbackend_sleep_base + random.nextInt(retrieval_forbackend_sleep_randomoffset));} catch (InterruptedException e) {}
187-
}
188-
for (Thread t: rts) t.join();
189-
if (acccount.get() < 0) break hloop;
185+
int count = tl == null ? 0 : tl.size();
186+
acccount.addAndGet(count);
187+
}
188+
};
189+
rts[j].start();
190190
try {Thread.sleep(retrieval_forbackend_sleep_base + random.nextInt(retrieval_forbackend_sleep_randomoffset));} catch (InterruptedException e) {}
191191
}
192+
for (Thread t: rts) t.join();
193+
if (acccount.get() < 0) break hloop;
194+
try {Thread.sleep(retrieval_forbackend_sleep_base + random.nextInt(retrieval_forbackend_sleep_randomoffset));} catch (InterruptedException e) {}
195+
196+
busy = true;
192197
}
193-
busy = true;
194198
}
195199

196200
// run some crawl steps

‎src/org/loklak/LoklakServer.java

-2
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@
104104
import org.loklak.api.p2p.HelloService;
105105
import org.loklak.api.p2p.PeersServlet;
106106
import org.loklak.api.p2p.PushServlet;
107-
import org.loklak.api.search.ConsoleService;
108107
import org.loklak.api.search.EventBriteCrawlerService;
109108
import org.loklak.api.search.GenericScraper;
110109
import org.loklak.api.search.GithubProfileScraper;
@@ -607,7 +606,6 @@ private static void setServerHandler(File dataFile){
607606
HelloService.class,
608607

609608
// search
610-
ConsoleService.class,
611609
EventBriteCrawlerService.class,
612610
MeetupsCrawlerService.class,
613611
RSSReaderService.class,

‎src/org/loklak/api/search/ConsoleService.java

-271
This file was deleted.

‎src/org/loklak/data/DAO.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,11 @@ public static ResultList<QueryEntry> SearchLocalQueries(final String q, final in
13021302
ResultList<Map<String, Object>> result = elasticsearch_client.fuzzyquery(IndexName.queries.name(), "query", q, resultCount, sort_field, default_sort_type, sort_order, since, until, range_field);
13031303
queries.setHits(result.getHits());
13041304
for (Map<String, Object> map: result) {
1305-
queries.add(new QueryEntry(new JSONObject(map)));
1305+
QueryEntry qe = new QueryEntry(new JSONObject(map));
1306+
// check a flag value for queries that probably never get new messages
1307+
if (qe.getMessagePeriod() != QueryEntry.DAY_MILLIS) {
1308+
queries.add(qe);
1309+
}
13061310
}
13071311
return queries;
13081312
}
@@ -1415,7 +1419,7 @@ public static TwitterTimeline scrapeTwitter(
14151419
DAO.severe(e);
14161420
}
14171421

1418-
if (recordQuery && Caretaker.acceptQuery4Retrieval(q)) {
1422+
if (recordQuery && Caretaker.acceptQuery4Retrieval(q) && tl.size() > 0) {
14191423
if (qe == null) {
14201424
// a new query occurred
14211425
qe = new QueryEntry(q, timezoneOffset, tl.period(), SourceType.TWITTER, byUserQuery);
@@ -1550,6 +1554,7 @@ public static ArrayList<String> getFrontPeers() {
15501554
for (String peer: remote) testpeers.add(peer);
15511555
return testpeers;
15521556
}
1557+
/*
15531558
if (frontPeerCache.size() == 0) {
15541559
// add dynamically all peers that contacted myself
15551560
for (Map<String, RemoteAccess> hmap: RemoteAccess.history.values()) {
@@ -1559,6 +1564,7 @@ public static ArrayList<String> getFrontPeers() {
15591564
}
15601565
}
15611566
testpeers.addAll(frontPeerCache);
1567+
*/
15621568
return getBestPeers(testpeers);
15631569
}
15641570

‎src/org/loklak/harvester/TwitterHarvester.java

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ public TwitterTimeline harvest_timeline() {
120120

121121
if (tl != null && tl.size() > 0) {
122122
// find content query strings and store them in the context cache
123+
tl.setQuery(q);
123124
checkContext(tl, true);
124125
}
125126

‎src/org/loklak/harvester/TwitterScraper.java

+7-5
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ private static TwitterTimeline[] search(
218218

219219
// parse
220220
Elements items = doc.getElementsByClass("stream-item");
221-
for (int itemc = 0; itemc < items.size(); itemc++) {
221+
itemloop: for (int itemc = 0; itemc < items.size(); itemc++) {
222222
Element item = items.get(itemc);
223223
if (debuglog) System.out.println(item.toString());
224224

@@ -258,10 +258,12 @@ private static TwitterTimeline[] search(
258258
}
259259

260260
String tweettimes = timestamp.attr("data-time-ms");
261+
if (tweettimes.length() == 0) continue itemloop; // sometimes tweets are not available any more
261262
long tweettime = Long.parseLong(tweettimes);
262-
long snowflaketime = snowflake2millis(Long.parseLong(tweetID));
263-
assert tweettime / 1000 == snowflaketime / 1000;
264-
263+
// this assertion holds in most, but unfortunately not in all cases, so it is commented out
264+
// long snowflaketime = snowflake2millis(Long.parseLong(tweetID));
265+
// assert tweettime / 1000L == snowflaketime / 1000L : "tweettime = " + tweettime + ", snowflaketime = " + snowflaketime;
266+
265267
Elements reply = item.getElementsByClass("ProfileTweet-action--reply").get(0).children();
266268
Elements retweet = item.getElementsByClass("ProfileTweet-action--retweet").get(0).children();
267269
Elements favourite = item.getElementsByClass("ProfileTweet-action--favorite").get(0).children();
@@ -747,7 +749,7 @@ public Post toJSON(final UserEntry user, final boolean calculatedData, final int
747749
// the tweet; the cleanup is a helper function which cleans mistakes from the past in scraping
748750
MessageEntry.TextLinkMap tlm = this.moreData.getText(iflinkexceedslength, urlstub, this.text, this.getLinks(), this.getPostId());
749751
this.put("text", tlm);
750-
if (this.status_id_url != null) this.put("link", this.status_id_url.toExternalForm());
752+
if (this.status_id_url != null) this.put("link", this.status_id_url.toExternalForm()); // this is the primary key for retrieval in elasticsearch
751753
this.put("id_str", this.postId);
752754
this.put("conversation_id", this.conversationID);
753755
this.put("conversation_user", this.conversationUserIDs);

‎src/org/loklak/http/ClientConnection.java

+12-15
Original file line numberDiff line numberDiff line change
@@ -421,24 +421,21 @@ public static void download(String source_url, File target_file) {
421421
}
422422

423423
public static byte[] download(String source_url) throws IOException {
424+
ClientConnection connection = new ClientConnection(source_url, "");
425+
if (connection.inputStream == null) return null;
426+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
427+
int count;
428+
byte[] buffer = new byte[4096];
429+
IOException ee = null;
424430
try {
425-
ClientConnection connection = new ClientConnection(source_url, "");
426-
if (connection.inputStream == null) return null;
427-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
428-
int count;
429-
byte[] buffer = new byte[4096];
430-
try {
431-
while ((count = connection.inputStream.read(buffer)) > 0) baos.write(buffer, 0, count);
432-
} catch (IOException e) {
433-
DAO.severe(e);
434-
} finally {
435-
connection.close();
436-
}
437-
return baos.toByteArray();
431+
while ((count = connection.inputStream.read(buffer)) > 0) baos.write(buffer, 0, count);
438432
} catch (IOException e) {
439-
DAO.severe(e);
440-
return null;
433+
ee = e;
434+
} finally {
435+
connection.close();
441436
}
437+
if (ee != null) throw ee;
438+
return baos.toByteArray();
442439
}
443440

444441
public int getStatusCode() {

‎src/org/loklak/objects/AbstractObjectEntry.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535

3636
public abstract class AbstractObjectEntry extends Post implements ObjectEntry {
3737

38-
public final static String TIMESTAMP_FIELDNAME = "timestamp";
39-
public final static String CREATED_AT_FIELDNAME = "created_at";
38+
public final static String TIMESTAMP_FIELDNAME = "timestamp"; // the harvesting time, NOT used for identification
39+
public final static String CREATED_AT_FIELDNAME = "created_at"; // the tweet time as embedded in the tweet, not used for identification either
4040

4141
public AbstractObjectEntry() {
4242
}

‎src/org/loklak/objects/QueryEntry.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
*/
6565
public class QueryEntry extends AbstractObjectEntry implements ObjectEntry {
6666

67-
private final static long DAY_MILLIS = 1000L * 60L * 60L * 24L;
67+
public final static long DAY_MILLIS = 1000L * 60L * 60L * 24L;
6868
private final static int RETRIEVAL_CONSTANT = 20; // the number of messages that we get with each retrieval at maximum
6969

7070
protected String query; // the query in the exact way as the user typed it in
@@ -144,7 +144,7 @@ public void update(final long message_period, final boolean byUserQuery) {
144144
this.query_last = this.retrieval_last;
145145
}
146146
long new_message_period = message_period; // can be Long.MAX_VALUE if less than 2 messages are in timeline!
147-
int new_messages_per_day = (int) (DAY_MILLIS / new_message_period); // this is an interpolation based on the last tweet list, can be 0!
147+
int new_messages_per_day = (int) (DAY_MILLIS / (new_message_period + 1)); // this is an interpolation based on the last tweet list, can be 0!
148148
if (new_message_period == Long.MAX_VALUE || new_messages_per_day == 0) {
149149
this.message_period = DAY_MILLIS;
150150
} else {
@@ -217,6 +217,10 @@ public int getQueryCount() {
217217
public int getRetrievalCount() {
218218
return this.retrieval_count;
219219
}
220+
221+
public long getMessagePeriod() {
222+
return this.message_period;
223+
}
220224

221225
public int getMessagesPerDay() {
222226
return this.messages_per_day;

‎src/org/loklak/objects/TwitterTimeline.java

-4
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ public JSONObject toJSON(boolean withEnrichedData, String metadata_field_name, S
131131
return json;
132132
}
133133

134-
public SusiThought toSusi(boolean withEnrichedData) throws JSONException {
135-
return toSusi(withEnrichedData, new SusiThought());
136-
}
137-
138134
private SusiThought toSusi(boolean withEnrichedData, SusiThought json) throws JSONException {
139135
json
140136
.setQuery(this.query)

‎src/org/loklak/susi/SusiProcedures.java

-71
This file was deleted.

‎src/org/loklak/susi/SusiThought.java

+1-97
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@
2020

2121
package org.loklak.susi;
2222

23-
import java.util.regex.Matcher;
24-
import java.util.regex.Pattern;
25-
2623
import org.json.JSONArray;
2724
import org.json.JSONObject;
2825

@@ -44,37 +41,6 @@ public SusiThought() {
4441
this.data_name = "data";
4542
}
4643

47-
/**
48-
* create a clone of a json object as a SusiThought object
49-
* @param json the 'other' thought, probably an exported and re-imported thought
50-
*/
51-
public SusiThought(JSONObject json) {
52-
this();
53-
if (json.has(this.metadata_name)) this.put(this.metadata_name, json.getJSONObject(this.metadata_name));
54-
if (json.has(this.data_name)) this.setData(json.getJSONArray(this.data_name));
55-
if (json.has("actions")) this.put("actions", json.getJSONArray("actions"));
56-
}
57-
58-
/**
59-
* Create an initial thought using the matcher on an expression.
60-
* Such an expression is like the input from a text source which contains keywords
61-
* that are essential for the thought. The matcher extracts such information.
62-
* Matching informations are named using the order of the appearance of the information pieces.
63-
* The first information is named '1', the second '2' and so on. The whole information which contained
64-
* the matching information is named '0'.
65-
* @param matcher
66-
*/
67-
public SusiThought(Matcher matcher) {
68-
this();
69-
this.setOffset(0).setHits(1);
70-
JSONObject row = new JSONObject();
71-
row.put("0", matcher.group(0));
72-
for (int i = 0; i < matcher.groupCount(); i++) {
73-
row.put(Integer.toString(i + 1), matcher.group(i + 1));
74-
}
75-
this.setData(new JSONArray().put(row));
76-
}
77-
7844
@Deprecated
7945
public SusiThought(String metadata_name, String data_name) {
8046
super(true);
@@ -202,67 +168,5 @@ public JSONArray getData() {
202168
this.put(data_name, a);
203169
return a;
204170
}
205-
206-
/**
207-
* Merging of data is required during an mind-meld.
208-
* To meld two thoughts, we combine their data arrays into one.
209-
* The resulting table has the maximum length of the source tables
210-
* @param table the information to be melted into our existing table.
211-
* @return the thought
212-
*/
213-
public SusiThought mergeData(JSONArray table1) {
214-
JSONArray table0 = this.getData();
215-
while (table0.length() < table1.length()) table0.put(new JSONObject());
216-
for (int i = 0; i < table1.length(); i++) {
217-
table0.getJSONObject(i).putAll(table1.getJSONObject(i));
218-
}
219-
setData(table0);
220-
return this;
221-
}
222-
223-
/**
224-
* If during thinking we observe something that we want to memorize, we can memorize this here
225-
* @param featureName the object key
226-
* @param observation the object value
227-
* @return the thought
228-
*/
229-
public SusiThought addObservation(String featureName, String observation) {
230-
JSONArray data = getData();
231-
for (int i = 0; i < data.length(); i++) {
232-
JSONObject spark = data.getJSONObject(i);
233-
if (!spark.has(featureName)) {
234-
spark.put(featureName, observation);
235-
return this;
236-
}
237-
}
238-
data.put(new JSONObject().put(featureName, observation));
239-
return this;
240-
}
241-
242-
public static final Pattern variable_pattern = Pattern.compile("\\$.*?\\$");
243-
244-
/**
245-
* Unification applies a piece of memory within the current argument to a statement
246-
* which creates an instantiated statement
247-
* @param statement
248-
* @return the instantiated statement with elements of the argument applied as much as possible
249-
*/
250-
public String unify(String statement) {
251-
JSONArray table = this.getData();
252-
if (table != null && table.length() > 0) {
253-
JSONObject row = table.getJSONObject(0);
254-
for (String key: row.keySet()) {
255-
int i = statement.indexOf("$" + key + "$");
256-
if (i >= 0) {
257-
statement = statement.substring(0, i) + row.get(key).toString() + statement.substring(i + key.length() + 2);
258-
}
259-
}
260-
}
261-
return statement;
262-
}
263-
264-
public static void main(String[] args) {
265-
SusiThought t = new SusiThought().addObservation("a", "letter-a");
266-
System.out.println(t.unify("the letter $a$"));
267-
}
171+
268172
}

‎src/org/loklak/susi/SusiTransfer.java

-201
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.