-
-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'add-xls-analyzer' into develop
- Loading branch information
Showing
13 changed files
with
212 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
require "excel_analyzer/analyzer" | ||
require "excel_analyzer/xls_analyzer" | ||
require "excel_analyzer/xlsx_analyzer" | ||
require "excel_analyzer/railtie" if defined?(Rails) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
require "open3" | ||
require "tempfile" | ||
require "tmpdir" | ||
|
||
require "active_storage" | ||
require "active_storage/analyzer" | ||
|
||
require "excel_analyzer/probe" | ||
|
||
module ExcelAnalyzer | ||
## | ||
# The Analyzer class is responsible for analyzing Excel (.xls) files uploaded | ||
# through Active Storage. | ||
# | ||
# Files are first converted to XLSX format and then probed for hidden data. | ||
# | ||
class XlsAnalyzer < ActiveStorage::Analyzer | ||
include ExcelAnalyzer::Probe | ||
|
||
CONTENT_TYPE = "application/vnd.ms-excel" | ||
|
||
def self.accept?(blob) | ||
blob.content_type == CONTENT_TYPE | ||
end | ||
|
||
def metadata | ||
{ excel: excel_metadata } | ||
end | ||
|
||
private | ||
|
||
def excel_metadata | ||
download_blob_to_tempfile(&method(:convert_and_probe)) | ||
rescue StandardError => ex | ||
{ error: ex.message } | ||
end | ||
|
||
def convert_and_probe(io) | ||
probe(convert(io)) | ||
end | ||
|
||
def convert(io) | ||
raise 'LibreOffice (soffice) command not found' unless soffice_installed? | ||
|
||
Dir.mktmpdir do |tmpdir| | ||
_stdout, _stderr, status = Open3.capture3( | ||
"soffice --headless --convert-to xlsx --outdir #{tmpdir} #{io.path}" | ||
) | ||
|
||
path = File.join(tmpdir, File.basename(io.path, ".*") + ".xlsx") | ||
|
||
if !status.success? || !File.exist?(path) | ||
raise "LibreOffice conversion failed" | ||
end | ||
|
||
Tempfile.new.tap do |tempfile| | ||
tempfile.write(File.read(path)) | ||
tempfile.rewind | ||
end | ||
end | ||
end | ||
|
||
def soffice_installed? | ||
system("which soffice > /dev/null 2>&1") | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
require "active_storage" | ||
require "active_storage/analyzer" | ||
|
||
require "excel_analyzer/probe" | ||
|
||
module ExcelAnalyzer | ||
## | ||
# The Analyzer class is responsible for analyzing Excel (.xlsx) files uploaded | ||
# through Active Storage. | ||
# | ||
class XlsxAnalyzer < ActiveStorage::Analyzer | ||
include ExcelAnalyzer::Probe | ||
|
||
CONTENT_TYPE = | ||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | ||
|
||
def self.accept?(blob) | ||
blob.content_type == CONTENT_TYPE | ||
end | ||
|
||
def metadata | ||
{ excel: excel_metadata } | ||
end | ||
|
||
private | ||
|
||
def excel_metadata | ||
download_blob_to_tempfile(&method(:probe)) | ||
rescue StandardError => ex | ||
{ error: ex.message } | ||
end | ||
end | ||
end |
89 changes: 89 additions & 0 deletions
89
gems/excel_analyzer/spec/excel_analyzer/xls_analyzer_spec.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# frozen_string_literal: true | ||
|
||
require "spec_helper" | ||
|
||
RSpec.describe ExcelAnalyzer::XlsAnalyzer do | ||
describe ".accept?" do | ||
subject { ExcelAnalyzer::XlsAnalyzer.accept?(blob) } | ||
|
||
context "when the blob is an Excel file" do | ||
let(:blob) do | ||
fake_blob(content_type: ExcelAnalyzer::XlsAnalyzer::CONTENT_TYPE) | ||
end | ||
|
||
it { is_expected.to eq true } | ||
end | ||
|
||
context "when the blob is not an Excel file" do | ||
let(:blob) { fake_blob(content_type: "text/plain") } | ||
it { is_expected.to eq false } | ||
end | ||
end | ||
|
||
describe "#metadata" do | ||
let(:metadata) { ExcelAnalyzer::XlsAnalyzer.new(blob).metadata } | ||
|
||
context "when the blob is an Excel file with hidden data" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/suspect.xls")), | ||
content_type: ExcelAnalyzer::XlsAnalyzer::CONTENT_TYPE) | ||
end | ||
|
||
it "detects pivot cache" do | ||
expect(metadata[:excel][:pivot_cache]).to eq true | ||
end | ||
|
||
it "detects external links" do | ||
expect(metadata[:excel][:external_links]).to eq true | ||
end | ||
|
||
it "detects hidden rows" do | ||
expect(metadata[:excel][:hidden_rows]).to eq true | ||
end | ||
|
||
it "detects hidden columns" do | ||
expect(metadata[:excel][:hidden_columns]).to eq true | ||
end | ||
|
||
it "detects hidden sheets" do | ||
expect(metadata[:excel][:hidden_sheets]).to eq true | ||
end | ||
end | ||
|
||
context "when the blob is an Excel file without hidden data" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/data.xls")), | ||
content_type: ExcelAnalyzer::XlsAnalyzer::CONTENT_TYPE) | ||
end | ||
|
||
it "does not detect hidden data" do | ||
expect(metadata[:excel]).to eq( | ||
pivot_cache: false, | ||
external_links: false, | ||
hidden_rows: false, | ||
hidden_columns: false, | ||
hidden_sheets: false | ||
) | ||
end | ||
end | ||
|
||
context "when the blob is not an Excel file" do | ||
let(:blob) do | ||
fake_blob(io: File.open(File.join(__dir__, "../fixtures/plain.txt")), | ||
content_type: "text/plain") | ||
end | ||
|
||
it "returns an error metadata" do | ||
expect(metadata[:excel]).to eq(error: "LibreOffice conversion failed") | ||
end | ||
end | ||
end | ||
|
||
private | ||
|
||
def fake_blob(io: nil, content_type:) | ||
dbl = double(content_type: content_type) | ||
allow(dbl).to receive(:open).and_yield(io) | ||
dbl | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters