Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions datafusion-partitioned/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,45 @@

TRIES=3
QUERY_NUM=1
TMP_DIR=$(mktemp -d)
trap 'rm -rf "${TMP_DIR}"' EXIT

echo $1
cat queries.sql | while read -r query; do
while read -r query; do
[ -z "$query" ] && continue

sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null

echo "$query" > /tmp/query.sql
QUERY_FILE="${TMP_DIR}/query-${QUERY_NUM}.sql"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is too much copying going on here, at least for my taste, and there are no code comments that explain the rationale. Please try to simplify.

MARKER="clickbench_query_${QUERY_NUM}_start"
printf "SELECT '${MARKER}';\n\n" > "${QUERY_FILE}"
for i in $(seq 1 $TRIES); do
printf '%s\n\n' "$query" >> "${QUERY_FILE}"
done

# Keep all tries in one process so DataFusion process-local caches stay hot.
# Use a marker query to ignore setup timings from create.sql.
ELAPSED_FILE="${TMP_DIR}/elapsed-${QUERY_NUM}.txt"
OUTPUT_FILE="${TMP_DIR}/output-${QUERY_NUM}.txt"
datafusion-cli -f create.sql "${QUERY_FILE}" > "${OUTPUT_FILE}" 2>&1
awk -v marker="$MARKER" '
index($0, marker) { seen = 1 }
seen && /Elapsed/ {
if (!skipped_marker_elapsed) {
skipped_marker_elapsed = 1
next
}
print $2
}
' "${OUTPUT_FILE}" > "${ELAPSED_FILE}"
if [ "$(wc -l < "${ELAPSED_FILE}")" -lt "$TRIES" ]; then
grep -v "Elapsed" "${OUTPUT_FILE}" >&2
fi

echo -n "["
for i in $(seq 1 $TRIES); do
# 1. there will be two query result, one for creating table another for executing the select statement
# 2. each query contains a "Query took xxx seconds", we just grep these 2 lines
# 3. use sed to take the second line
# 4. use awk to take the number we want
RES=$(datafusion-cli -f create.sql /tmp/query.sql 2>&1 | grep "Elapsed" |tail -1| awk '{ print $2 }')
RES=$(awk -v line="$i" 'NR == line { print; exit }' "${ELAPSED_FILE}")
[[ $RES != "" ]] && \
echo -n "$RES" || \
echo -n "null"
Expand All @@ -25,4 +50,4 @@ cat queries.sql | while read -r query; do
echo "],"

QUERY_NUM=$((QUERY_NUM + 1))
done
done < queries.sql
41 changes: 33 additions & 8 deletions datafusion/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,45 @@

TRIES=3
QUERY_NUM=1
TMP_DIR=$(mktemp -d)
trap 'rm -rf "${TMP_DIR}"' EXIT

echo $1
cat queries.sql | while read -r query; do
while read -r query; do
[ -z "$query" ] && continue

sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null

echo "$query" > /tmp/query.sql
QUERY_FILE="${TMP_DIR}/query-${QUERY_NUM}.sql"
MARKER="clickbench_query_${QUERY_NUM}_start"
printf "SELECT '${MARKER}';\n\n" > "${QUERY_FILE}"
for i in $(seq 1 $TRIES); do
printf '%s\n\n' "$query" >> "${QUERY_FILE}"
done

# Keep all tries in one process so DataFusion process-local caches stay hot.
# Use a marker query to ignore setup timings from create.sql.
ELAPSED_FILE="${TMP_DIR}/elapsed-${QUERY_NUM}.txt"
OUTPUT_FILE="${TMP_DIR}/output-${QUERY_NUM}.txt"
datafusion-cli -f create.sql "${QUERY_FILE}" > "${OUTPUT_FILE}" 2>&1
awk -v marker="$MARKER" '
index($0, marker) { seen = 1 }
seen && /Elapsed/ {
if (!skipped_marker_elapsed) {
skipped_marker_elapsed = 1
next
}
print $2
}
' "${OUTPUT_FILE}" > "${ELAPSED_FILE}"
if [ "$(wc -l < "${ELAPSED_FILE}")" -lt "$TRIES" ]; then
grep -v "Elapsed" "${OUTPUT_FILE}" >&2
fi

echo -n "["
for i in $(seq 1 $TRIES); do
# 1. there will be two query result, one for creating table another for executing the select statement
# 2. each query contains a "Query took xxx seconds", we just grep these 2 lines
# 3. use sed to take the second line
# 4. use awk to take the number we want
RES=$(datafusion-cli -f create.sql /tmp/query.sql 2>&1 | grep "Elapsed" |tail -1 | awk '{ print $2 }')
RES=$(awk -v line="$i" 'NR == line { print; exit }' "${ELAPSED_FILE}")
[[ $RES != "" ]] && \
echo -n "$RES" || \
echo -n "null"
Expand All @@ -25,4 +50,4 @@ cat queries.sql | while read -r query; do
echo "],"

QUERY_NUM=$((QUERY_NUM + 1))
done
done < queries.sql